Rename QS8 GEMM/IGEMM/DWCONV microkernels

Include requantization scheme in the microkernel name

PiperOrigin-RevId: 375612878
diff --git a/BUILD.bazel b/BUILD.bazel
index a2d8cb6..a0899a4 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -6895,9 +6895,9 @@
 )
 
 xnnpack_unit_test(
-    name = "qs8_dwconv_minmax_test",
+    name = "qs8_dwconv_minmax_gemmlowp_test",
     srcs = [
-        "test/qs8-dwconv-minmax.cc",
+        "test/qs8-dwconv-minmax-gemmlowp.cc",
         "test/dwconv-microkernel-tester.h",
         "src/xnnpack/AlignedAllocator.h",
     ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
@@ -6915,10 +6915,10 @@
 )
 
 xnnpack_unit_test(
-    name = "qs8_gemm_minmax_test",
+    name = "qs8_gemm_minmax_gemmlowp_test",
     timeout = "moderate",
     srcs = [
-        "test/qs8-gemm-minmax.cc",
+        "test/qs8-gemm-minmax-gemmlowp.cc",
         "test/gemm-microkernel-tester.h",
         "src/xnnpack/AlignedAllocator.h",
     ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
@@ -6926,10 +6926,10 @@
 )
 
 xnnpack_unit_test(
-    name = "qs8_igemm_minmax_test",
+    name = "qs8_igemm_minmax_gemmlowp_test",
     timeout = "moderate",
     srcs = [
-        "test/qs8-igemm-minmax.cc",
+        "test/qs8-igemm-minmax-gemmlowp.cc",
         "test/gemm-microkernel-tester.h",
         "src/xnnpack/AlignedAllocator.h",
     ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e0c97c4..4da8bfc 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4619,23 +4619,23 @@
   TARGET_LINK_LIBRARIES(qs8-gavgpool-minmax-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
   ADD_TEST(qs8-gavgpool-test qs8-gavgpool-minmax-test)
 
-  ADD_EXECUTABLE(qs8-gemm-minmax-test test/qs8-gemm-minmax.cc)
-  SET_TARGET_PROPERTIES(qs8-gemm-minmax-test PROPERTIES
+  ADD_EXECUTABLE(qs8-gemm-minmax-gemmlowp-test test/qs8-gemm-minmax-gemmlowp.cc)
+  SET_TARGET_PROPERTIES(qs8-gemm-minmax-gemmlowp-test PROPERTIES
     CXX_STANDARD 11
     CXX_STANDARD_REQUIRED YES
     CXX_EXTENSIONS YES)
-  TARGET_INCLUDE_DIRECTORIES(qs8-gemm-minmax-test PRIVATE src test)
-  TARGET_LINK_LIBRARIES(qs8-gemm-minmax-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
-  ADD_TEST(qs8-gemm-minmax-test qs8-gemm-minmax-test)
+  TARGET_INCLUDE_DIRECTORIES(qs8-gemm-minmax-gemmlowp-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(qs8-gemm-minmax-gemmlowp-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+  ADD_TEST(qs8-gemm-minmax-gemmlowp-test qs8-gemm-minmax-gemmlowp-test)
 
-  ADD_EXECUTABLE(qs8-igemm-minmax-test test/qs8-igemm-minmax.cc)
-  SET_TARGET_PROPERTIES(qs8-igemm-minmax-test PROPERTIES
+  ADD_EXECUTABLE(qs8-igemm-minmax-gemmlowp-test test/qs8-igemm-minmax-gemmlowp.cc)
+  SET_TARGET_PROPERTIES(qs8-igemm-minmax-gemmlowp-test PROPERTIES
     CXX_STANDARD 11
     CXX_STANDARD_REQUIRED YES
     CXX_EXTENSIONS YES)
-  TARGET_INCLUDE_DIRECTORIES(qs8-igemm-minmax-test PRIVATE src test)
-  TARGET_LINK_LIBRARIES(qs8-igemm-minmax-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
-  ADD_TEST(qs8-igemm-minmax-test qs8-igemm-minmax-test)
+  TARGET_INCLUDE_DIRECTORIES(qs8-igemm-minmax-gemmlowp-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(qs8-igemm-minmax-gemmlowp-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+  ADD_TEST(qs8-igemm-minmax-gemmlowp-test qs8-igemm-minmax-gemmlowp-test)
 
   ADD_EXECUTABLE(qs8-vadd-minmax-test test/qs8-vadd-minmax.cc)
   SET_TARGET_PROPERTIES(qs8-vadd-minmax-test PROPERTIES
diff --git a/bench/qs8-gemm-e2e.cc b/bench/qs8-gemm-e2e.cc
index 5199b96..dbcf5fd 100644
--- a/bench/qs8-gemm-e2e.cc
+++ b/bench/qs8-gemm-e2e.cc
@@ -76,2127 +76,2127 @@
 
 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32,
-      xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot,
-      xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32,
-      xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
       1 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
-  static void qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot,
-      xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
       1 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
-  static void qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm,
       1 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
-  static void qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal,
       1 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
-  static void qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
       1 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
-  static void qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53,
       1 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55,
-      xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55,
-      xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
       4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
-  static void qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32,
-      xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot,
-      xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32,
-      xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
       4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
-  static void qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64,
-      xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
       4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53,
-      xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
       4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
-  static void qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53,
-      xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
       4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal,
       2 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
-  static void qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal,
       2 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
-  static void qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm,
-      xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm,
       2 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
-  static void qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53,
-      xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53,
       2 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
-  static void qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
-      xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53,
       2 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
-  static void qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal,
       2 /* mr */, 8  /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64)
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64)
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53)
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal)
 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane,
       1 /* mr */, 8  /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
       1 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane,
       2 /* mr */, 8  /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
       2 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane,
       3 /* mr */, 8  /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
       3 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane,
       4 /* mr */, 8  /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
       4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane,
       6 /* mr */, 8  /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane,
       6 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm,
       1 /* mr */, 8  /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
-  static void qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm,
       1 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm,
       2 /* mr */, 8  /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm,
       2 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm,
       3 /* mr */, 8  /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm,
       3 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm,
       4 /* mr */, 8  /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm,
       4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm,
       6 /* mr */, 8  /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm,
       6 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup,
       1 /* mr */, 8  /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup,
       1 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup,
       2 /* mr */, 8  /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup,
       2 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup,
       3 /* mr */, 8  /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup,
       3 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup,
       4 /* mr */, 8  /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup,
       4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup,
       1 /* mr */, 8  /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup,
       1 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup,
       2 /* mr */, 8  /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup,
       2 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup,
       3 /* mr */, 8  /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup,
       3 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup,
       4 /* mr */, 8  /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup,
       4 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup,
       1 /* mr */, 8  /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup,
       1 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup,
       2 /* mr */, 8  /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup,
       2 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup,
       3 /* mr */, 8  /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup,
       3 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup,
       4 /* mr */, 8  /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup,
-      xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup,
-      xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup,
       4 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot,
-      xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot,
       1 /* mr */, 8  /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot,
-      xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
       1 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_4x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot,
-      xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot,
       4 /* mr */, 8  /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot,
-      xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
       4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
 
-  static void qs8_gemm_minmax_ukernel_6x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot,
-      xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot,
       6 /* mr */, 8  /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
 
-  static void qs8_gemm_minmax_ukernel_6x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot,
-      xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
       6 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
 
-  static void qs8_gemm_minmax_ukernel_8x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot,
-      xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot,
       8 /* mr */, 8  /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
 
-  static void qs8_gemm_minmax_ukernel_8x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot,
-      xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot,
-      xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot,
       8 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEONDOT);
   }
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal,
       1 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal,
       1 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal,
       2 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal,
       2 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal,
       3 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal,
       3 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal,
       4 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal,
       4 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal,
       1 /* mr */, 8  /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal,
       1 /* mr */, 16 /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal,
       2 /* mr */, 8  /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal,
       2 /* mr */, 16 /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal,
       4 /* mr */, 8  /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal,
       4 /* mr */, 16 /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal,
       4 /* mr */, 8  /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal,
       4 /* mr */, 16 /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal,
       1 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal,
       1 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal,
       2 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal,
       2 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal,
       3 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal,
       3 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal,
       4 /* mr */, 8  /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal,
       4 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckNEON);
   }
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x8c4__neondot);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x16c4__neondot);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x8c4__neondot);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16c4__neondot);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_6x8c4__neondot);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_6x16c4__neondot);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_8x8c4__neondot);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_8x16c4__neondot);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup);
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x16c8__avx512skx(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx,
       1 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX512F);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x16c8__avx512skx(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx,
-      xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx,
       2 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX512F);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_3x16c8__avx512skx(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx,
-      xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx,
       3 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX512F);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x16c8__avx512skx(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx,
-      xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx,
-      xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx,
-      xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx,
       4 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX512F);
   }
 
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x8c8__avx2(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2,
       1 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX2);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x8c8__avx2(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2,
-      xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2,
       2 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX2);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x8c8__avx2(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2,
-      xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2,
-      xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2,
-      xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2,
       3 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX2);
   }
 
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x4c2__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64,
       1 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckXOP);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x4c2__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128,
       1 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckXOP);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x4c2__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64,
-      xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64,
       2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckXOP);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x4c2__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128,
-      xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128,
       2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckXOP);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c2__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64,
-      xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64,
       3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckXOP);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c2__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128,
-      xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128,
       3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckXOP);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x4c2__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64,
-      xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64,
       4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckXOP);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x4c2__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128,
-      xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128,
       4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckXOP);
   }
 
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x4c8__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64,
       1 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckXOP);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x4c8__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128,
       1 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckXOP);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x4c8__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64,
-      xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64,
       2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckXOP);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x4c8__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128,
-      xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128,
       2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckXOP);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c8__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64,
-      xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64,
       3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckXOP);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c8__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128,
-      xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128,
       3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckXOP);
   }
 
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x4c2__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64,
       1 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x4c2__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128,
       1 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x4c2__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64,
-      xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64,
       2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x4c2__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128,
-      xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128,
       2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c2__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64,
-      xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64,
       3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c2__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128,
-      xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128,
       3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x4c2__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64,
-      xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64,
       4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x4c2__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128,
-      xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128,
       4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX);
   }
 
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x4c8__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64,
       1 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x4c8__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128,
       1 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x4c8__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64,
-      xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64,
       2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x4c8__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128,
-      xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128,
       2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c8__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64,
-      xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64,
       3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c8__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128,
-      xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128,
       3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckAVX);
   }
 
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64,
       1 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSE41);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128,
       1 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSE41);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64,
-      xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64,
       2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSE41);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128,
-      xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128,
       2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSE41);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64,
-      xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64,
       3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSE41);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128,
-      xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128,
       3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSE41);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64,
-      xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64,
       4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSE41);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128,
-      xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128,
       4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSE41);
   }
 
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64,
       1 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSE41);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128,
       1 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSE41);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64,
-      xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64,
       2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSE41);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128,
-      xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128,
       2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSE41);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64,
-      xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64,
       3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSE41);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128,
-      xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128,
       3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSE41);
   }
 
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64,
       1 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSSE3);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128,
       1 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSSE3);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64,
-      xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64,
       2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSSE3);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128,
-      xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128,
       2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSSE3);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64,
-      xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64,
       3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSSE3);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128,
-      xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128,
       3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSSE3);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64,
-      xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64,
       4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSSE3);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128,
-      xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128,
       4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSSE3);
   }
 
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64,
       1 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSSE3);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128,
       1 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSSE3);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64,
-      xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64,
       2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSSE3);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128,
-      xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128,
       2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSSE3);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64,
-      xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64,
       3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSSE3);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128,
-      xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128,
       3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
       benchmark::utils::CheckSSSE3);
   }
 
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64,
       1 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128,
       1 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64,
-      xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64,
       2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128,
-      xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128,
       2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64,
-      xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64,
       3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128,
-      xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128,
       3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64,
-      xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64,
       4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
   }
 
-  static void qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128,
-      xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128,
       4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
   }
 
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64,
       1 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128,
       1 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64,
-      xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64,
       2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128,
-      xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128,
       2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64,
-      xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64,
       3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128,
-      xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128,
       3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */);
   }
 
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x16c8__avx512skx);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x16c8__avx512skx);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x16c8__avx512skx);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x16c8__avx512skx);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x8c8__avx2);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x8c8__avx2);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x8c8__avx2);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c2__xop_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c2__xop_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c2__xop_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c2__xop_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c2__xop_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c2__xop_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x4c2__xop_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x4c2__xop_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c8__xop_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c8__xop_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c8__xop_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c8__xop_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c8__xop_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c8__xop_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c2__avx_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c2__avx_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c2__avx_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c2__avx_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c2__avx_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c2__avx_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x4c2__avx_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x4c2__avx_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c8__avx_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c8__avx_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c8__avx_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c8__avx_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c8__avx_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c8__avx_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128);
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128);
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64);
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64);
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128);
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 #if XNN_ARCH_WASMSIMD
 #if XNN_ENABLE_FULL_BENCHMARKS
-  static void qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64,
       1 /* mr */, 4 /* nr */, 3 /* log2_kr */);
   }
 
-  static void qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128,
       1 /* mr */, 4 /* nr */, 3 /* log2_kr */);
   }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-  static void qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64,
-      xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64,
       2 /* mr */, 4 /* nr */, 3 /* log2_kr */);
   }
 
-  static void qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128,
-      xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128,
       2 /* mr */, 4 /* nr */, 3 /* log2_kr */);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64,
-      xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64,
       3 /* mr */, 4 /* nr */, 3 /* log2_kr */);
   }
 
-  static void qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
+  static void qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
     GEMMEnd2EndBenchmark(state, model,
-      xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128,
-      xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128,
-      xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
-      xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128,
+      xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128,
+      xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128,
       3 /* mr */, 4 /* nr */, 3 /* log2_kr */);
   }
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128)
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64)
-  BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64)
+  BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128)
 #endif  // XNN_ARCH_WASMSIMD
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-static void qs8_gemm_minmax_ukernel_1x2__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
+static void qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
   GEMMEnd2EndBenchmark(state, model,
-    xnn_qs8_gemm_minmax_ukernel_1x2__scalar,
-    xnn_qs8_igemm_minmax_ukernel_1x2__scalar,
-    xnn_qs8_gemm_minmax_ukernel_1x2__scalar,
-    xnn_qs8_igemm_minmax_ukernel_1x2__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar,
     1 /* mr */, 2 /* nr */);
 }
 
-static void qs8_gemm_minmax_ukernel_1x4__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
+static void qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
   GEMMEnd2EndBenchmark(state, model,
-    xnn_qs8_gemm_minmax_ukernel_1x4__scalar,
-    xnn_qs8_igemm_minmax_ukernel_1x4__scalar,
-    xnn_qs8_gemm_minmax_ukernel_1x4__scalar,
-    xnn_qs8_igemm_minmax_ukernel_1x4__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar,
     1 /* mr */, 4 /* nr */);
 }
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
 
-static void qs8_gemm_minmax_ukernel_2x2__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
+static void qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
   GEMMEnd2EndBenchmark(state, model,
-    xnn_qs8_gemm_minmax_ukernel_2x2__scalar,
-    xnn_qs8_igemm_minmax_ukernel_2x2__scalar,
-    xnn_qs8_gemm_minmax_ukernel_1x2__scalar,
-    xnn_qs8_igemm_minmax_ukernel_1x2__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar,
     2 /* mr */, 2 /* nr */);
 }
 
-static void qs8_gemm_minmax_ukernel_3x2__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
+static void qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
   GEMMEnd2EndBenchmark(state, model,
-    xnn_qs8_gemm_minmax_ukernel_3x2__scalar,
-    xnn_qs8_igemm_minmax_ukernel_3x2__scalar,
-    xnn_qs8_gemm_minmax_ukernel_1x2__scalar,
-    xnn_qs8_igemm_minmax_ukernel_1x2__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar,
     3 /* mr */, 2 /* nr */);
 }
 
-static void qs8_gemm_minmax_ukernel_4x2__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
+static void qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
   GEMMEnd2EndBenchmark(state, model,
-    xnn_qs8_gemm_minmax_ukernel_4x2__scalar,
-    xnn_qs8_igemm_minmax_ukernel_4x2__scalar,
-    xnn_qs8_gemm_minmax_ukernel_1x2__scalar,
-    xnn_qs8_igemm_minmax_ukernel_1x2__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar,
     4 /* mr */, 2 /* nr */);
 }
 
-static void qs8_gemm_minmax_ukernel_2x4__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
+static void qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
   GEMMEnd2EndBenchmark(state, model,
-    xnn_qs8_gemm_minmax_ukernel_2x4__scalar,
-    xnn_qs8_igemm_minmax_ukernel_2x4__scalar,
-    xnn_qs8_gemm_minmax_ukernel_1x4__scalar,
-    xnn_qs8_igemm_minmax_ukernel_1x4__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar,
     2 /* mr */, 4 /* nr */);
 }
 
-static void qs8_gemm_minmax_ukernel_3x4__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
+static void qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
   GEMMEnd2EndBenchmark(state, model,
-    xnn_qs8_gemm_minmax_ukernel_3x4__scalar,
-    xnn_qs8_igemm_minmax_ukernel_3x4__scalar,
-    xnn_qs8_gemm_minmax_ukernel_1x4__scalar,
-    xnn_qs8_igemm_minmax_ukernel_1x4__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar,
     3 /* mr */, 4 /* nr */);
 }
 
-static void qs8_gemm_minmax_ukernel_4x4__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
+static void qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
   GEMMEnd2EndBenchmark(state, model,
-    xnn_qs8_gemm_minmax_ukernel_4x4__scalar,
-    xnn_qs8_igemm_minmax_ukernel_4x4__scalar,
-    xnn_qs8_gemm_minmax_ukernel_1x4__scalar,
-    xnn_qs8_igemm_minmax_ukernel_1x4__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar,
+    xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar,
+    xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar,
     4 /* mr */, 4 /* nr */);
 }
 
 #if XNN_ENABLE_FULL_BENCHMARKS
-BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x2__scalar)
-BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_1x4__scalar)
+BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar)
+BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar)
 #endif  // XNN_ENABLE_FULL_BENCHMARKS
-BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x2__scalar)
-BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x2__scalar)
-BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x2__scalar)
-BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_2x4__scalar)
-BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_3x4__scalar)
-BENCHMARK_QS8_END2END(qs8_gemm_minmax_ukernel_4x4__scalar)
+BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar)
+BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar)
+BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar)
+BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar)
+BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar)
+BENCHMARK_QS8_END2END(qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar)
 
 #ifndef XNNPACK_BENCHMARK_NO_MAIN
 BENCHMARK_MAIN();
diff --git a/bench/qs8-gemm.cc b/bench/qs8-gemm.cc
index 0bf89b9..1009b5b 100644
--- a/bench/qs8-gemm.cc
+++ b/bench/qs8-gemm.cc
@@ -213,307 +213,307 @@
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
   static void qs8_gemm_1x8__neon_mlal_lane(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, 1, 8, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, 1, 8, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8__neon_mlal_lane(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, 2, 8, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, 2, 8, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8__neon_mlal_lane(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, 3, 8, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, 3, 8, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8__neon_mlal_lane(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, 4, 8, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, 4, 8, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_6x8__neon_mlal_lane(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, 6, 8, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, 6, 8, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16__neon_mlal_lane(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, 1, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, 1, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16__neon_mlal_lane(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, 2, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, 2, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16__neon_mlal_lane(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, 3, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, 3, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16__neon_mlal_lane(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, 4, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, 4, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_6x16__neon_mlal_lane(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, 6, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, 6, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, 1, 8, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, 1, 8, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, 2, 8, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, 2, 8, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, 3, 8, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, 3, 8, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, 4, 8, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, 4, 8, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_6x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, 6, 8, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, 6, 8, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, 1, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, 1, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, 2, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, 2, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, 3, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, 3, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, 4, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, 4, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_6x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, 6, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, 6, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8__neon_mull_addw_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, 1, 8, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, 1, 8, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8__neon_mull_addw_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, 2, 8, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, 2, 8, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8__neon_mull_addw_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, 3, 8, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, 3, 8, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8__neon_mull_addw_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, 4, 8, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, 4, 8, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16__neon_mull_addw_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, 1, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, 1, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16__neon_mull_addw_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, 2, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, 2, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16__neon_mull_addw_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, 3, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, 3, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16__neon_mull_addw_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, 4, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, 4, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, 1, 8, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, 1, 8, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, 2, 8, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, 2, 8, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, 3, 8, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, 3, 8, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, 4, 8, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, 4, 8, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, 1, 16, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, 1, 16, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, 2, 16, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, 2, 16, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, 3, 16, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, 3, 16, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, 4, 16, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, 4, 16, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, 1, 8, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, 1, 8, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, 2, 8, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, 2, 8, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, 3, 8, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, 3, 8, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, 4, 8, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, 4, 8, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, 1, 16, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, 1, 16, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, 2, 16, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, 2, 16, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, 3, 16, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, 3, 16, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, 4, 16, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, 4, 16, 2, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c8__neon_mull_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, 1, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, 1, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c8__neon_mull_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, 2, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, 2, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8c8__neon_mull_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, 3, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, 3, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8c8__neon_mull_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, 4, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, 4, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16c8__neon_mull_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, 1, 16, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, 1, 16, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16c8__neon_mull_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, 2, 16, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, 2, 16, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16c8__neon_mull_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, 3, 16, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, 3, 16, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16c8__neon_mull_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, 4, 16, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, 4, 16, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c8__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, 1, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, 1, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c8__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, 2, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, 2, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8c8__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, 3, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, 3, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8c8__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, 4, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, 4, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16c8__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, 1, 16, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, 1, 16, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16c8__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, 2, 16, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, 2, 16, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16c8__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, 3, 16, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, 3, 16, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16c8__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, 4, 16, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, 4, 16, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c16__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, 1, 8, 16, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, 1, 8, 16, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c16__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, 2, 8, 16, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, 2, 8, 16, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8c16__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, 3, 8, 16, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, 3, 8, 16, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8c16__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, 4, 8, 16, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, 4, 8, 16, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16c16__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, 1, 16, 16, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, 1, 16, 16, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16c16__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, 2, 16, 16, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, 2, 16, 16, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16c16__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, 3, 16, 16, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, 3, 16, 16, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16c16__neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, 4, 16, 16, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, 4, 16, 16, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c4__neondot(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, 1, 8, 4, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, 1, 8, 4, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_4x8c4__neondot(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, 4, 8, 4, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, 4, 8, 4, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_6x8c4__neondot(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, 6, 8, 4, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, 6, 8, 4, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_8x8c4__neondot(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, 8, 8, 4, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, 8, 8, 4, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_1x16c4__neondot(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, 1, 16, 4, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, 1, 16, 4, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_4x16c4__neondot(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, 4, 16, 4, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, 4, 16, 4, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_6x16c4__neondot(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, 6, 16, 4, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, 6, 16, 4, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_8x16c4__neondot(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, 8, 16, 4, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, 8, 16, 4, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEONDOT);
   }
 
@@ -598,71 +598,71 @@
 
 #if XNN_ARCH_ARM64
   static void qs8_gemm_4x16c4__aarch64_neondot_cortex_a55(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, 4, 16, 4, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, 4, 16, 4, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_1x16c4__aarch64_neondot_ld32(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, 1, 16, 4, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, 1, 16, 4, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_1x16c4__aarch64_neondot_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, 1, 16, 4, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, 1, 16, 4, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_4x16c4__aarch64_neondot_ld32(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, 4, 16, 4, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, 4, 16, 4, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_4x16c4__aarch64_neondot_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, 4, 16, 4, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, 4, 16, 4, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, 4, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, 4, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, 4, 16, 1, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, 4, 16, 1, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c8__aarch64_neon_mlal_padal_prfm(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, 1, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, 1, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c8__aarch64_neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, 1, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, 1, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c8__aarch64_neon_mlal_padal_cortex_a53(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, 1, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, 1, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, 1, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, 1, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c8__aarch64_neon_mull_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, 2, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, 2, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c8__aarch64_neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, 2, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, 2, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c8__aarch64_neon_mlal_padal_prfm(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, 2, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, 2, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c8__aarch64_neon_mlal_padal_cortex_a53(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, 2, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, 2, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, 2, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, 2, 8, 8, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c16__aarch64_neon_mlal_padal(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, 2, 8, 16, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, 2, 8, 16, 1,
       xnn_init_qs8_gemm_neon_params, benchmark::utils::CheckNEON);
   }
 
@@ -688,363 +688,363 @@
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
   static void qs8_gemm_2x16c8__avx512skx(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, 2, 16, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, 2, 16, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX512SKX);
   }
   static void qs8_gemm_3x16c8__avx512skx(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, 3, 16, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, 3, 16, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX512SKX);
   }
   static void qs8_gemm_4x16c8__avx512skx(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, 4, 16, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, 4, 16, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX512SKX);
   }
 
   static void qs8_gemm_2x8c8__avx2(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, 2, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, 2, 8, 8, 1,
       xnn_init_qs8_gemm_avx2_params, benchmark::utils::CheckAVX2);
   }
   static void qs8_gemm_3x8c8__avx2(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, 3, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, 3, 8, 8, 1,
       xnn_init_qs8_gemm_avx2_params, benchmark::utils::CheckAVX2);
   }
 
   static void qs8_gemm_xw_2x8c8__avx2(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, 2, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, 2, 8, 8, 1,
       xnn_init_qs8_gemm_avx2_params, benchmark::utils::CheckAVX2, true);
   }
   static void qs8_gemm_xw_3x8c8__avx2(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, 3, 8, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, 3, 8, 8, 1,
       xnn_init_qs8_gemm_avx2_params, benchmark::utils::CheckAVX2, true);
   }
 
   static void qs8_gemm_2x4c2__xop_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP);
   }
   static void qs8_gemm_3x4c2__xop_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP);
   }
   static void qs8_gemm_4x4c2__xop_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP);
   }
 
   static void qs8_gemm_2x4c2__xop_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP);
   }
   static void qs8_gemm_3x4c2__xop_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP);
   }
   static void qs8_gemm_4x4c2__xop_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP);
   }
 
   static void qs8_gemm_xw_2x4c2__xop(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__xop, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__xop, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP, true);
   }
   static void qs8_gemm_xw_3x4c2__xop(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__xop, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__xop, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP, true);
   }
   static void qs8_gemm_xw_4x4c2__xop(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP, true);
   }
 
   static void qs8_gemm_2x4c8__xop_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP);
   }
   static void qs8_gemm_3x4c8__xop_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP);
   }
 
   static void qs8_gemm_2x4c8__xop_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP);
   }
   static void qs8_gemm_3x4c8__xop_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP);
   }
 
   static void qs8_gemm_xw_2x4c8__xop(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP, true);
   }
   static void qs8_gemm_xw_3x4c8__xop(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckXOP, true);
   }
 
   static void qs8_gemm_2x4c2__avx_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX);
   }
   static void qs8_gemm_3x4c2__avx_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX);
   }
   static void qs8_gemm_4x4c2__avx_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX);
   }
 
   static void qs8_gemm_2x4c2__avx_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX);
   }
   static void qs8_gemm_3x4c2__avx_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX);
   }
   static void qs8_gemm_4x4c2__avx_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX);
   }
 
   static void qs8_gemm_xw_2x4c2__avx(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__avx, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__avx, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX, true);
   }
   static void qs8_gemm_xw_3x4c2__avx(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__avx, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__avx, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX, true);
   }
   static void qs8_gemm_xw_4x4c2__avx(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX, true);
   }
 
   static void qs8_gemm_2x4c8__avx_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX);
   }
   static void qs8_gemm_3x4c8__avx_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX);
   }
 
   static void qs8_gemm_2x4c8__avx_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX);
   }
   static void qs8_gemm_3x4c8__avx_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX);
   }
 
   static void qs8_gemm_xw_2x4c8__avx(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX, true);
   }
   static void qs8_gemm_xw_3x4c8__avx(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckAVX, true);
   }
 
   static void qs8_gemm_2x4c2__sse41_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41);
   }
   static void qs8_gemm_3x4c2__sse41_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41);
   }
   static void qs8_gemm_4x4c2__sse41_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41);
   }
 
   static void qs8_gemm_2x4c2__sse41_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41);
   }
   static void qs8_gemm_3x4c2__sse41_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41);
   }
   static void qs8_gemm_4x4c2__sse41_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41);
   }
 
   static void qs8_gemm_xw_2x4c2__sse41(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__sse41, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__sse41, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41, true);
   }
   static void qs8_gemm_xw_3x4c2__sse41(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__sse41, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__sse41, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41, true);
   }
   static void qs8_gemm_xw_4x4c2__sse41(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41, true);
   }
 
   static void qs8_gemm_2x4c8__sse41_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41);
   }
   static void qs8_gemm_3x4c8__sse41_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41);
   }
 
   static void qs8_gemm_2x4c8__sse41_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41);
   }
   static void qs8_gemm_3x4c8__sse41_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41);
   }
 
   static void qs8_gemm_xw_2x4c8__sse41(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41, true);
   }
   static void qs8_gemm_xw_3x4c8__sse41(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse4_params, benchmark::utils::CheckSSE41, true);
   }
 
   static void qs8_gemm_2x4c2__ssse3_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3);
   }
   static void qs8_gemm_3x4c2__ssse3_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3);
   }
   static void qs8_gemm_4x4c2__ssse3_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3);
   }
 
   static void qs8_gemm_2x4c2__ssse3_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3);
   }
   static void qs8_gemm_3x4c2__ssse3_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3);
   }
   static void qs8_gemm_4x4c2__ssse3_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3);
   }
 
   static void qs8_gemm_xw_2x4c2__ssse3(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__ssse3, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__ssse3, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3, true);
   }
   static void qs8_gemm_xw_3x4c2__ssse3(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__ssse3, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__ssse3, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3, true);
   }
   static void qs8_gemm_xw_4x4c2__ssse3(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3, true);
   }
 
   static void qs8_gemm_2x4c8__ssse3_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3);
   }
   static void qs8_gemm_3x4c8__ssse3_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3);
   }
 
   static void qs8_gemm_2x4c8__ssse3_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3);
   }
   static void qs8_gemm_3x4c8__ssse3_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3);
   }
 
   static void qs8_gemm_xw_2x4c8__ssse3(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3, true);
   }
   static void qs8_gemm_xw_3x4c8__ssse3(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse2_params, benchmark::utils::CheckSSSE3, true);
   }
 
   static void qs8_gemm_2x4c2__sse2_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params);
   }
   static void qs8_gemm_3x4c2__sse2_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params);
   }
   static void qs8_gemm_4x4c2__sse2_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params);
   }
 
   static void qs8_gemm_2x4c2__sse2_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params);
   }
   static void qs8_gemm_3x4c2__sse2_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params);
   }
   static void qs8_gemm_4x4c2__sse2_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params);
   }
 
   static void qs8_gemm_xw_2x4c2__sse2(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__sse2, 2, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__sse2, 2, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params, nullptr, true);
   }
   static void qs8_gemm_xw_3x4c2__sse2(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__sse2, 3, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__sse2, 3, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params, nullptr, true);
   }
   static void qs8_gemm_xw_4x4c2__sse2(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, 4, 4, 2, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, 4, 4, 2, 1,
       xnn_init_qs8_gemm_sse2_params, nullptr, true);
   }
 
   static void qs8_gemm_2x4c8__sse2_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse2_params);
   }
   static void qs8_gemm_3x4c8__sse2_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse2_params);
   }
 
   static void qs8_gemm_2x4c8__sse2_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse2_params);
   }
   static void qs8_gemm_3x4c8__sse2_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse2_params);
   }
 
   static void qs8_gemm_xw_2x4c8__sse2(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, 2, 4, 8, 1,
       xnn_init_qs8_gemm_sse2_params, nullptr, true);
   }
   static void qs8_gemm_xw_3x4c8__sse2(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, 3, 4, 8, 1,
       xnn_init_qs8_gemm_sse2_params, nullptr, true);
   }
 
@@ -1141,29 +1141,29 @@
 
 #if XNN_ARCH_WASMSIMD
   static void qs8_gemm_2x4c8__wasmsimd_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, 2, 4, 8, 1,
       xnn_init_qs8_gemm_wasmsimd_params);
   }
   static void qs8_gemm_3x4c8__wasmsimd_ld64(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, 3, 4, 8, 1,
       xnn_init_qs8_gemm_wasmsimd_params);
   }
 
   static void qs8_gemm_2x4c8__wasmsimd_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, 2, 4, 8, 1,
       xnn_init_qs8_gemm_wasmsimd_params);
   }
   static void qs8_gemm_3x4c8__wasmsimd_ld128(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, 3, 4, 8, 1,
       xnn_init_qs8_gemm_wasmsimd_params);
   }
 
   static void qs8_gemm_xw_2x4c8__wasmsimd(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, 2, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, 2, 4, 8, 1,
       xnn_init_qs8_gemm_wasmsimd_params, nullptr, true);
   }
   static void qs8_gemm_xw_3x4c8__wasmsimd(benchmark::State& state, const char* net) {
-    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, 3, 4, 8, 1,
+    GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, 3, 4, 8, 1,
       xnn_init_qs8_gemm_wasmsimd_params, nullptr, true);
   }
 
@@ -1177,28 +1177,28 @@
 
 
 static void qs8_gemm_2x2__scalar(benchmark::State& state, const char* net) {
-  GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x2__scalar, 2, 2, 1, 1,
+  GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, 2, 2, 1, 1,
     xnn_init_qs8_gemm_scalar_params);
 }
 static void qs8_gemm_3x2__scalar(benchmark::State& state, const char* net) {
-  GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x2__scalar, 3, 2, 1, 1,
+  GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, 3, 2, 1, 1,
     xnn_init_qs8_gemm_scalar_params);
 }
 static void qs8_gemm_4x2__scalar(benchmark::State& state, const char* net) {
-  GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x2__scalar, 4, 2, 1, 1,
+  GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, 4, 2, 1, 1,
     xnn_init_qs8_gemm_scalar_params);
 }
 
 static void qs8_gemm_2x4__scalar(benchmark::State& state, const char* net) {
-  GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_2x4__scalar, 2, 4, 1, 1,
+  GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, 2, 4, 1, 1,
     xnn_init_qs8_gemm_scalar_params);
 }
 static void qs8_gemm_3x4__scalar(benchmark::State& state, const char* net) {
-  GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_3x4__scalar, 3, 4, 1, 1,
+  GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, 3, 4, 1, 1,
     xnn_init_qs8_gemm_scalar_params);
 }
 static void qs8_gemm_4x4__scalar(benchmark::State& state, const char* net) {
-  GEMMBenchmark(state, xnn_qs8_gemm_minmax_ukernel_4x4__scalar, 4, 4, 1, 1,
+  GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, 4, 4, 1, 1,
     xnn_init_qs8_gemm_scalar_params);
 }
 
diff --git a/scripts/generate-qs8-dwconv.sh b/scripts/generate-qs8-dwconv.sh
index bb1e2a8..9574396 100755
--- a/scripts/generate-qs8-dwconv.sh
+++ b/scripts/generate-qs8-dwconv.sh
@@ -115,4 +115,4 @@
 tools/xngen src/qs8-dwconv/unipass-avx512skx-mul32.c.in -D CHANNEL_TILE=32 -D KERNEL_TILE=25 -o src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx512skx-mul32.c
 
 ################################## Unit tests #################################
-tools/generate-dwconv-test.py --spec test/qs8-dwconv-minmax.yaml --output test/qs8-dwconv-minmax.cc
+tools/generate-dwconv-test.py --spec test/qs8-dwconv-minmax-gemmlowp.yaml --output test/qs8-dwconv-minmax-gemmlowp.cc
diff --git a/scripts/generate-qs8-gemm.sh b/scripts/generate-qs8-gemm.sh
index bb740de..8665494 100755
--- a/scripts/generate-qs8-gemm.sh
+++ b/scripts/generate-qs8-gemm.sh
@@ -293,4 +293,4 @@
 tools/xngen src/qs8-gemm/MRx16c8-avx512skx.c.in -D MR=4 -D VARIANT=LD256 -o src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
 
 ################################## Unit tests #################################
-tools/generate-gemm-test.py --spec test/qs8-gemm-minmax.yaml --output test/qs8-gemm-minmax.cc
+tools/generate-gemm-test.py --spec test/qs8-gemm-minmax-gemmlowp.yaml --output test/qs8-gemm-minmax-gemmlowp.cc
diff --git a/scripts/generate-qs8-igemm.sh b/scripts/generate-qs8-igemm.sh
index c685af5..a8fdfaa 100755
--- a/scripts/generate-qs8-igemm.sh
+++ b/scripts/generate-qs8-igemm.sh
@@ -240,4 +240,4 @@
 tools/xngen src/qs8-igemm/MRx16c8-avx512skx.c.in -D MR=4 -D VARIANT=LD256 -o src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
 
 ################################## Unit tests #################################
-tools/generate-gemm-test.py --spec test/qs8-igemm-minmax.yaml --output test/qs8-igemm-minmax.cc
+tools/generate-gemm-test.py --spec test/qs8-igemm-minmax-gemmlowp.yaml --output test/qs8-igemm-minmax-gemmlowp.cc
diff --git a/src/init.c b/src/init.c
index 14d2ca6..a952d30 100644
--- a/src/init.c
+++ b/src/init.c
@@ -110,30 +110,30 @@
       init_flags |= XNN_INIT_FLAG_QS8;
 
       if (!XNN_PLATFORM_IOS && cpuinfo_has_arm_neon_dot()) {
-        xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot);
-        xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot);
-        xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot);
-        xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot);
+        xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot);
+        xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot);
+        xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot);
+        xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot);
         xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_neon_params;
         xnn_params.qs8.gemm.mr = 4;
         xnn_params.qs8.gemm.nr = 8;
         xnn_params.qs8.gemm.log2_kr = 2;
       } else {
-        xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup);
-        xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup);
-        xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup);
-        xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup);
+        xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup);
+        xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup);
+        xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup);
+        xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup);
         xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_neon_params;
         xnn_params.qs8.gemm.mr = 2;
         xnn_params.qs8.gemm.nr = 8;
         xnn_params.qs8.gemm.log2_kr = 1;
       }
 
-      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16;
+      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16;
       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_gemm_neon_params;
       xnn_params.qs8.dwconv[0].channel_tile = 8;
       xnn_params.qs8.dwconv[0].primary_tile = 9;
-      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16;
+      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16;
       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_gemm_neon_params;
       xnn_params.qs8.dwconv[1].channel_tile = 8;
       xnn_params.qs8.dwconv[1].primary_tile = 25;
@@ -841,19 +841,19 @@
     #if XNN_PLATFORM_IOS || XNN_PLATFORM_MAC
       #if XNN_ENABLE_ASSEMBLY
         if (cpuinfo_has_arm_neon_dot()) {
-          xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64);
-          xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot);
-          xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64);
-          xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot);
+          xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64);
+          xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
+          xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64);
+          xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
           xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_neon_params;
           xnn_params.qs8.gemm.mr = 4;
           xnn_params.qs8.gemm.nr = 16;
           xnn_params.qs8.gemm.log2_kr = 2;
         } else {
-          xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal);
-          xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal);
-          xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal);
-          xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal);
+          xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal);
+          xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal);
+          xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal);
+          xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal);
           xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_neon_params;
           xnn_params.qs8.gemm.mr = 2;
           xnn_params.qs8.gemm.nr = 8;
@@ -861,19 +861,19 @@
         }
       #else  // !XNN_ENABLE_ASSEMBLY
         if (cpuinfo_has_arm_neon_dot()) {
-          xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot);
-          xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot);
-          xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot);
-          xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot);
+          xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot);
+          xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
+          xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot);
+          xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
           xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_neon_params;
           xnn_params.qs8.gemm.mr = 4;
           xnn_params.qs8.gemm.nr = 16;
           xnn_params.qs8.gemm.log2_kr = 2;
         } else {
-          xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup);
-          xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup);
-          xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup);
-          xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup);
+          xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup);
+          xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup);
+          xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup);
+          xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup);
           xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_neon_params;
           xnn_params.qs8.gemm.mr = 2;
           xnn_params.qs8.gemm.nr = 8;
@@ -885,16 +885,16 @@
         if (cpuinfo_has_arm_neon_dot()) {
           switch (cpuinfo_get_core(0)->uarch) {
             case cpuinfo_uarch_cortex_a55:
-              xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55);
-              xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55);
+              xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55);
+              xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55);
               break;
             default:
-              xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64);
-              xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64);
+              xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64);
+              xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64);
               break;
           }
-          xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot);
-          xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot);
+          xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
+          xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
           xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_neon_params;
           xnn_params.qs8.gemm.mr = 4;
           xnn_params.qs8.gemm.nr = 16;
@@ -903,10 +903,10 @@
           switch (cpuinfo_get_core(0)->uarch) {
             case cpuinfo_uarch_cortex_a53:
             case cpuinfo_uarch_cortex_a55r0:
-              xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53);
-              xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53);
-              xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane);
-              xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane);
+              xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53);
+              xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53);
+              xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane);
+              xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane);
               xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_neon_params;
               xnn_params.qs8.gemm.mr = 4;
               xnn_params.qs8.gemm.nr = 16;
@@ -915,10 +915,10 @@
             case cpuinfo_uarch_cortex_a72:
             case cpuinfo_uarch_cortex_a73:
             case cpuinfo_uarch_kryo:
-              xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm);
-              xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm);
-              xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm);
-              xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm);
+              xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm);
+              xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm);
+              xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm);
+              xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm);
               xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_neon_params;
               xnn_params.qs8.gemm.mr = 2;
               xnn_params.qs8.gemm.nr = 8;
@@ -926,10 +926,10 @@
               break;
 
             default:
-              xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal);
-              xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal);
-              xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal);
-              xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal);
+              xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal);
+              xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal);
+              xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal);
+              xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal);
               xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_neon_params;
               xnn_params.qs8.gemm.mr = 2;
               xnn_params.qs8.gemm.nr = 8;
@@ -954,19 +954,19 @@
               case cpuinfo_uarch_cortex_a53:
               case cpuinfo_uarch_cortex_a55r0:
                 if (mr == 2 && nr == 8 && log2_kr == 3) {
-                  xnn_params.qs8.gemm.minmax.gemm.function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53;
-                  xnn_params.qs8.gemm.minmax.igemm.function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53;
-                  xnn_params.qs8.gemm.minmax.gemm1.function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53;
-                  xnn_params.qs8.gemm.minmax.igemm1.function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53;
+                  xnn_params.qs8.gemm.minmax.gemm.function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53;
+                  xnn_params.qs8.gemm.minmax.igemm.function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53;
+                  xnn_params.qs8.gemm.minmax.gemm1.function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53;
+                  xnn_params.qs8.gemm.minmax.igemm1.function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53;
                 }
                 break;
 
               case cpuinfo_uarch_cortex_a55:
                 if (mr == 4 && nr == 16 && log2_kr == 2) {
-                  xnn_params.qs8.gemm.minmax.gemm.function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55;
-                  xnn_params.qs8.gemm.minmax.igemm.function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55;
-                  xnn_params.qs8.gemm.minmax.gemm1.function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot;
-                  xnn_params.qs8.gemm.minmax.igemm1.function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot;
+                  xnn_params.qs8.gemm.minmax.gemm.function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55;
+                  xnn_params.qs8.gemm.minmax.igemm.function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55;
+                  xnn_params.qs8.gemm.minmax.gemm1.function[i] = (xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot;
+                  xnn_params.qs8.gemm.minmax.igemm1.function[i] = (xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot;
                 }
                 break;
               default:
@@ -977,19 +977,19 @@
         #endif  // XNN_MAX_UARCH_TYPES > 1
       #else  // !XNN_ENABLE_ASSEMBLY
         if (cpuinfo_has_arm_neon_dot()) {
-          xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot);
-          xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot);
-          xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot);
-          xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot);
+          xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot);
+          xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
+          xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot);
+          xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
           xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_neon_params;
           xnn_params.qs8.gemm.mr = 4;
           xnn_params.qs8.gemm.nr = 16;
           xnn_params.qs8.gemm.log2_kr = 2;
         } else {
-          xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup);
-          xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup);
-          xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup);
-          xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup);
+          xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup);
+          xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup);
+          xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup);
+          xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup);
           xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_neon_params;
           xnn_params.qs8.gemm.mr = 2;
           xnn_params.qs8.gemm.nr = 8;
@@ -998,11 +998,11 @@
       #endif  // XNN_ENABLE_ASSEMBLY
     #endif  // XNN_PLATFORM_IOS || XNN_PLATFORM_MAC
 
-    xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16;
+    xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16;
     xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_gemm_neon_params;
     xnn_params.qs8.dwconv[0].channel_tile = 8;
     xnn_params.qs8.dwconv[0].primary_tile = 9;
-    xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16;
+    xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16;
     xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_gemm_neon_params;
     xnn_params.qs8.dwconv[1].channel_tile = 8;
     xnn_params.qs8.dwconv[1].primary_tile = 25;
@@ -1560,65 +1560,65 @@
     init_flags |= XNN_INIT_FLAG_QS8;
 
     if (cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
-      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx);
-      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx);
-      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx);
-      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx);
+      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx);
+      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx);
+      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx);
+      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx);
       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_sse4_params;
       xnn_params.qs8.gemm.mr = 4;
       xnn_params.qs8.gemm.nr = 16;
       xnn_params.qs8.gemm.log2_kr = 3;
     } else if (cpuinfo_has_x86_xop()) {
       // XOP should be checked before AVX2: AMD Excavator supports both, but performs better with XOP microkernels
-      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64);
-      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64);
-      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64);
-      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64);
+      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64);
+      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64);
+      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64);
+      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64);
       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_sse4_params;
       xnn_params.qs8.gemm.mr = 2;
       xnn_params.qs8.gemm.nr = 4;
       xnn_params.qs8.gemm.log2_kr = 3;
     } else if (cpuinfo_has_x86_avx2()) {
-      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2);
-      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2);
-      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2);
-      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2);
+      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2);
+      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2);
+      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2);
+      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2);
       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_avx2_params;
       xnn_params.qs8.gemm.mr = 3;
       xnn_params.qs8.gemm.nr = 8;
       xnn_params.qs8.gemm.log2_kr = 3;
     } else if (cpuinfo_has_x86_avx()) {
-      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128);
-      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128);
-      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128);
-      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128);
+      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128);
+      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128);
+      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128);
+      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128);
       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_sse4_params;
       xnn_params.qs8.gemm.mr = 2;
       xnn_params.qs8.gemm.nr = 4;
       xnn_params.qs8.gemm.log2_kr = 3;
     } else if (cpuinfo_has_x86_sse4_1()) {
-      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64);
-      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64);
-      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64);
-      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64);
+      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64);
+      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64);
+      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64);
+      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64);
       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_sse4_params;
       xnn_params.qs8.gemm.mr = 3;
       xnn_params.qs8.gemm.nr = 4;
       xnn_params.qs8.gemm.log2_kr = 3;
     } else if (cpuinfo_has_x86_ssse3()) {
-      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64);
-      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64);
-      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64);
-      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64);
+      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64);
+      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64);
+      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64);
+      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64);
       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_sse2_params;
       xnn_params.qs8.gemm.mr = 3;
       xnn_params.qs8.gemm.nr = 4;
       xnn_params.qs8.gemm.log2_kr = 3;
     } else {
-      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64);
-      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64);
-      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64);
-      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64);
+      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64);
+      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64);
+      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64);
+      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64);
       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_sse2_params;
       xnn_params.qs8.gemm.mr = 3;
       xnn_params.qs8.gemm.nr = 4;
@@ -1626,53 +1626,53 @@
     }
 
     if (cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
-      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32;
+      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32;
       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_gemm_sse2_params;
       xnn_params.qs8.dwconv[0].channel_tile = 32;
-      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32;
+      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32;
       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_gemm_sse2_params;
       xnn_params.qs8.dwconv[1].channel_tile = 32;
     } else if (cpuinfo_has_x86_xop()) {
       // XOP should be checked before AVX2: AMD Excavator supports both, but performs better with XOP microkernels
-      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32;
+      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32;
       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_gemm_sse4_params;
       xnn_params.qs8.dwconv[0].channel_tile = 16;
-      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32;
+      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32;
       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_gemm_sse4_params;
       xnn_params.qs8.dwconv[1].channel_tile = 16;
     } else if (cpuinfo_has_x86_avx2()) {
-      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32;
+      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32;
       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_gemm_avx2_params;
       xnn_params.qs8.dwconv[0].channel_tile = 16;
-      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32;
+      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32;
       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_gemm_avx2_params;
       xnn_params.qs8.dwconv[1].channel_tile = 16;
     } else if (cpuinfo_has_x86_avx()) {
-      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32;
+      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32;
       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_gemm_sse4_params;
       xnn_params.qs8.dwconv[0].channel_tile = 16;
-      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32;
+      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32;
       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_gemm_sse4_params;
       xnn_params.qs8.dwconv[1].channel_tile = 16;
     } else if (cpuinfo_has_x86_sse4_1()) {
-      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16;
+      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16;
       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_gemm_sse4_params;
       xnn_params.qs8.dwconv[0].channel_tile = 8;
-      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16;
+      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16;
       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_gemm_sse4_params;
       xnn_params.qs8.dwconv[1].channel_tile = 8;
     } else if (cpuinfo_has_x86_ssse3()) {
-      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16;
+      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16;
       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_gemm_sse2_params;
       xnn_params.qs8.dwconv[0].channel_tile = 8;
-      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16;
+      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16;
       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_gemm_sse2_params;
       xnn_params.qs8.dwconv[1].channel_tile = 8;
     } else if (cpuinfo_has_x86_sse2()) {
-      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16;
+      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16;
       xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_gemm_sse2_params;
       xnn_params.qs8.dwconv[0].channel_tile = 8;
-      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16;
+      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16;
       xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_gemm_sse2_params;
       xnn_params.qs8.dwconv[1].channel_tile = 8;
     }
@@ -2302,20 +2302,20 @@
   #ifndef XNN_NO_QS8_OPERATORS
     init_flags |= XNN_INIT_FLAG_QS8;
 
-    xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64);
-    xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64);
-    xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64);
-    xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64);
+    xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64);
+    xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64);
+    xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64);
+    xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64);
     xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_wasmsimd_params;
     xnn_params.qs8.gemm.mr = 3;
     xnn_params.qs8.gemm.nr = 4;
     xnn_params.qs8.gemm.log2_kr = 3;
 
-    xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16;
+    xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16;
     xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_gemm_wasmsimd_params;
     xnn_params.qs8.dwconv[0].channel_tile = 8;
     xnn_params.qs8.dwconv[0].primary_tile = 9;
-    xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16;
+    xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16;
     xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_gemm_wasmsimd_params;
     xnn_params.qs8.dwconv[1].channel_tile = 8;
     xnn_params.qs8.dwconv[1].primary_tile = 25;
@@ -2839,28 +2839,28 @@
     init_flags |= XNN_INIT_FLAG_QS8;
 
     if (is_wasm_x86) {
-      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_2x2__scalar);
-      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_2x2__scalar);
-      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x2__scalar);
-      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x2__scalar);
+      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar);
+      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar);
+      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar);
+      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar);
       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_scalar_params;
       xnn_params.qs8.gemm.mr = 2;
       xnn_params.qs8.gemm.nr = 2;
     } else {
-      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_4x4__scalar);
-      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_4x4__scalar);
-      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x4__scalar);
-      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x4__scalar);
+      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar);
+      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar);
+      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar);
+      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar);
       xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_scalar_params;
       xnn_params.qs8.gemm.mr = 4;
       xnn_params.qs8.gemm.nr = 4;
     }
 
-    xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar;
+    xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar;
     xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_gemm_scalar_params;
     xnn_params.qs8.dwconv[0].channel_tile = 2;
     xnn_params.qs8.dwconv[0].primary_tile = 9;
-    xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar;
+    xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar;
     xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_gemm_scalar_params;
     xnn_params.qs8.dwconv[1].channel_tile = 2;
     xnn_params.qs8.dwconv[1].primary_tile = 25;
@@ -3225,19 +3225,19 @@
   #ifndef XNN_NO_QS8_OPERATORS
     init_flags |= XNN_INIT_FLAG_QS8;
 
-    xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_3x4__scalar);
-    xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_3x4__scalar);
-    xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_ukernel_1x4__scalar);
-    xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_ukernel_1x4__scalar);
+    xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar);
+    xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar);
+    xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar);
+    xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar);
     xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_gemm_scalar_params;
     xnn_params.qs8.gemm.mr = 3;
     xnn_params.qs8.gemm.nr = 4;
 
-    xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar;
+    xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar;
     xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_gemm_scalar_params;
     xnn_params.qs8.dwconv[0].channel_tile = 2;
     xnn_params.qs8.dwconv[0].primary_tile = 9;
-    xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar;
+    xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar;
     xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_gemm_scalar_params;
     xnn_params.qs8.dwconv[1].channel_tile = 2;
     xnn_params.qs8.dwconv[1].primary_tile = 25;
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul16.c
index 201e81c..9a333f4 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul16.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul32.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul32.c
index 715c5ab..a067c31 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul32.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul16.c
index 15c168f..713c60d 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul16.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul32.c
index 37d3bcc..e0161bb 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul32.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
@@ -417,7 +417,7 @@
         _mm256_add_epi32(_mm256_and_si256(vq31prod89ABCDEF, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod89ABCDEF));
 
       const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx512skx-mul32.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx512skx-mul32.c
index 5cdbe0f..864e168 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx512skx-mul32.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx512skx-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-neon-mul16.c
index c5e4158..d8715ce 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-neon-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse2-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse2-mul16.c
index 13a5160..569cc79 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse2-mul16.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse2-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul16.c
index 16037c0..52f071a 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul16.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul32.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul32.c
index f332747..3343cc4 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul32.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-ssse3-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-ssse3-mul16.c
index 938d0e5..8fa383e 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-ssse3-mul16.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-ssse3-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-wasmsimd-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-wasmsimd-mul16.c
index 85135de..78c11a6 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-wasmsimd-mul16.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-wasmsimd-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-xop-mul32.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-xop-mul32.c
index 9dc6211..aebaf30 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-xop-mul32.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-xop-mul32.c
@@ -20,7 +20,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul16.c
index a8bb56a..8bac6f5 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul16.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul32.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul32.c
index 45c2fa8..555f739 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul32.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul16.c
index b1db9e6..e478bae 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul16.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul32.c
index b03ec06..ae760d2 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul32.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
@@ -193,7 +193,7 @@
         _mm256_add_epi32(_mm256_and_si256(vq31prod89ABCDEF, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod89ABCDEF));
 
       const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx512skx-mul32.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx512skx-mul32.c
index 78c542a..c789d11 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx512skx-mul32.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx512skx-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-neon-mul16.c
index 6daee4f..c11dc32 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-neon-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse2-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse2-mul16.c
index 96df3cd..ec641b7 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse2-mul16.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse2-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul16.c
index eadc573..a6f5b2f 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul16.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul32.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul32.c
index 580cfb2..d6e298b 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul32.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-ssse3-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-ssse3-mul16.c
index 3d148fe..3dc45b4 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-ssse3-mul16.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-ssse3-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-wasmsimd-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-wasmsimd-mul16.c
index eb7b3cf..ea1b223 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-wasmsimd-mul16.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-wasmsimd-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-xop-mul32.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-xop-mul32.c
index d1590b2..a91d509 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-xop-mul32.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-xop-mul32.c
@@ -20,7 +20,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up1x25-minmax-gemmlowp-scalar.c b/src/qs8-dwconv/gen/up1x25-minmax-gemmlowp-scalar.c
index 05aaa74..b831f10 100644
--- a/src/qs8-dwconv/gen/up1x25-minmax-gemmlowp-scalar.c
+++ b/src/qs8-dwconv/gen/up1x25-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/scalar-utils.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up1x25__scalar(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up1x9-minmax-gemmlowp-scalar.c b/src/qs8-dwconv/gen/up1x9-minmax-gemmlowp-scalar.c
index 29523f7..ec4ab28 100644
--- a/src/qs8-dwconv/gen/up1x9-minmax-gemmlowp-scalar.c
+++ b/src/qs8-dwconv/gen/up1x9-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/scalar-utils.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up1x9__scalar(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul16.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul16.c
index 5040e72..bf88d5e 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul16.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul32.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul32.c
index f32c5d5..d113713 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul32.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx2-mul32.c
index 30157d0..8961bd8 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx2-mul32.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
@@ -501,7 +501,7 @@
         _mm256_add_epi32(_mm256_and_si256(vq31prodGHIJKLMN, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prodGHIJKLMN));
 
       const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-neon-mul16.c
index ac87af0..53d2bf9 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-neon-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse2-mul16.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse2-mul16.c
index 8ba83d2..7023fc4 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse2-mul16.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse2-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul16.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul16.c
index d0069bb..340f5f3 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul16.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul32.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul32.c
index 3309700..04ebdc9 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul32.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-ssse3-mul16.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-ssse3-mul16.c
index 499ab88..30e1f03 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-ssse3-mul16.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-ssse3-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-wasmsimd-mul16.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-wasmsimd-mul16.c
index e0c71db..64a9281 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-wasmsimd-mul16.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-wasmsimd-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-xop-mul32.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-xop-mul32.c
index fae92d2..f4ecccb 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-xop-mul32.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-xop-mul32.c
@@ -20,7 +20,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul16.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul16.c
index 1582948..28eb3a3 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul16.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul32.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul32.c
index 32bb9dd..407af20 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul32.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx2-mul32.c
index 26221c8..b652b3d 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx2-mul32.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
@@ -229,7 +229,7 @@
         _mm256_add_epi32(_mm256_and_si256(vq31prodGHIJKLMN, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prodGHIJKLMN));
 
       const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-neon-mul16.c
index 0de9001..d1844da 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-neon-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse2-mul16.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse2-mul16.c
index c2a24ed..95d69d2 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse2-mul16.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse2-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul16.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul16.c
index b681da4..1905f55 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul16.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul32.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul32.c
index 65decb9..5efb25d 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul32.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-ssse3-mul16.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-ssse3-mul16.c
index 46cd248..7dbe948 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-ssse3-mul16.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-ssse3-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-wasmsimd-mul16.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-wasmsimd-mul16.c
index f5efd68..edca769 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-wasmsimd-mul16.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-wasmsimd-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-xop-mul32.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-xop-mul32.c
index 9aa0bda..9c04013 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-xop-mul32.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-xop-mul32.c
@@ -20,7 +20,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up2x25-minmax-gemmlowp-scalar.c b/src/qs8-dwconv/gen/up2x25-minmax-gemmlowp-scalar.c
index aa314b5..1bcc61d 100644
--- a/src/qs8-dwconv/gen/up2x25-minmax-gemmlowp-scalar.c
+++ b/src/qs8-dwconv/gen/up2x25-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/scalar-utils.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up2x9-minmax-gemmlowp-scalar.c b/src/qs8-dwconv/gen/up2x9-minmax-gemmlowp-scalar.c
index 3e35cf3..5c67cf4 100644
--- a/src/qs8-dwconv/gen/up2x9-minmax-gemmlowp-scalar.c
+++ b/src/qs8-dwconv/gen/up2x9-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/scalar-utils.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul16.c b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul16.c
index e22d41f..7ca30b5 100644
--- a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul16.c
+++ b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul32.c
index 268ac2a..8973c9d 100644
--- a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul32.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
@@ -585,7 +585,7 @@
         _mm256_add_epi32(_mm256_and_si256(vq31prodOPQRSTUV, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prodOPQRSTUV));
 
       const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
diff --git a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx512skx-mul32.c b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx512skx-mul32.c
index 20d7dd2..72cf2f0 100644
--- a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx512skx-mul32.c
+++ b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx512skx-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-neon-mul16.c
index 558a399..b5cf1f2 100644
--- a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-neon-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul16.c b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul16.c
index a7be837..bf634ea 100644
--- a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul16.c
+++ b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul32.c
index 1e2a93e..3eb639a 100644
--- a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul32.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
@@ -265,7 +265,7 @@
         _mm256_add_epi32(_mm256_and_si256(vq31prodOPQRSTUV, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prodOPQRSTUV));
 
       const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
diff --git a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx512skx-mul32.c b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx512skx-mul32.c
index d9e52a7..b321986 100644
--- a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx512skx-mul32.c
+++ b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx512skx-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-neon-mul16.c
index e0e6458..5477ee3 100644
--- a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-neon-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up4x25-minmax-gemmlowp-scalar.c b/src/qs8-dwconv/gen/up4x25-minmax-gemmlowp-scalar.c
index 33992d5..56f74fb 100644
--- a/src/qs8-dwconv/gen/up4x25-minmax-gemmlowp-scalar.c
+++ b/src/qs8-dwconv/gen/up4x25-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/scalar-utils.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up4x9-minmax-gemmlowp-scalar.c b/src/qs8-dwconv/gen/up4x9-minmax-gemmlowp-scalar.c
index 6cd245b..8980ff2 100644
--- a/src/qs8-dwconv/gen/up4x9-minmax-gemmlowp-scalar.c
+++ b/src/qs8-dwconv/gen/up4x9-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/scalar-utils.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul16.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul16.c
index b23e417..232974b 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul16.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul32.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul32.c
index 889b7a8..491e608 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul32.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx2-mul32.c
index a298d77..29996a5 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx2-mul32.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-neon-mul16.c
index 71b69f2..21dd238 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-neon-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse2-mul16.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse2-mul16.c
index 498844c..161dcd7 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse2-mul16.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse2-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul16.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul16.c
index 45dfe82..53f696c 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul16.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul32.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul32.c
index 3ccbf5a..049dc5b 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul32.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-ssse3-mul16.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-ssse3-mul16.c
index 5704b3b..432bfa2 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-ssse3-mul16.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-ssse3-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-wasmsimd-mul16.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-wasmsimd-mul16.c
index 38345b4..9ed4d64 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-wasmsimd-mul16.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-wasmsimd-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-xop-mul32.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-xop-mul32.c
index a4467b8..34957ae 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-xop-mul32.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-xop-mul32.c
@@ -20,7 +20,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul16.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul16.c
index 7f4f9e7..ee1e262 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul16.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul32.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul32.c
index 20ae162..2e938df 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul32.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c
index fdebd15..3f11447 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-neon-mul16.c
index 1b13e62..51492bd 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-neon-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse2-mul16.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse2-mul16.c
index d9bb1c1..78ed3c8 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse2-mul16.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse2-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul16.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul16.c
index 0a422aa..4cad6ea 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul16.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul32.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul32.c
index 4cb309e..2c1059c 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul32.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul32.c
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-ssse3-mul16.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-ssse3-mul16.c
index b1f1651..23b45c8 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-ssse3-mul16.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-ssse3-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-wasmsimd-mul16.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-wasmsimd-mul16.c
index cd229d5..3832f21 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-wasmsimd-mul16.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-wasmsimd-mul16.c
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-xop-mul32.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-xop-mul32.c
index f3d8308..63b32dd 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-xop-mul32.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-xop-mul32.c
@@ -20,7 +20,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/unipass-avx2-mul16.c.in b/src/qs8-dwconv/unipass-avx2-mul16.c.in
index f61650f..4bcbe25 100644
--- a/src/qs8-dwconv/unipass-avx2-mul16.c.in
+++ b/src/qs8-dwconv/unipass-avx2-mul16.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__avx2_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__avx2_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/unipass-avx2-mul32.c.in b/src/qs8-dwconv/unipass-avx2-mul32.c.in
index 8d808fc..072b747 100644
--- a/src/qs8-dwconv/unipass-avx2-mul32.c.in
+++ b/src/qs8-dwconv/unipass-avx2-mul32.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__avx2_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__avx2_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/unipass-avx512skx-mul32.c.in b/src/qs8-dwconv/unipass-avx512skx-mul32.c.in
index 8889966..903961e 100644
--- a/src/qs8-dwconv/unipass-avx512skx-mul32.c.in
+++ b/src/qs8-dwconv/unipass-avx512skx-mul32.c.in
@@ -15,7 +15,7 @@
 #include <xnnpack/intrinsics-polyfill.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__avx512skx_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__avx512skx_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/unipass-neon-mul16.c.in b/src/qs8-dwconv/unipass-neon-mul16.c.in
index b9d8862..54714f9 100644
--- a/src/qs8-dwconv/unipass-neon-mul16.c.in
+++ b/src/qs8-dwconv/unipass-neon-mul16.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__neon_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__neon_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/unipass-scalar.c.in b/src/qs8-dwconv/unipass-scalar.c.in
index 7cd25c6..cddbbc1 100644
--- a/src/qs8-dwconv/unipass-scalar.c.in
+++ b/src/qs8-dwconv/unipass-scalar.c.in
@@ -11,7 +11,7 @@
 #include <xnnpack/scalar-utils.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__scalar(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__scalar(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/unipass-sse-mul16.c.in b/src/qs8-dwconv/unipass-sse-mul16.c.in
index 5036dd7..a513992 100644
--- a/src/qs8-dwconv/unipass-sse-mul16.c.in
+++ b/src/qs8-dwconv/unipass-sse-mul16.c.in
@@ -17,7 +17,7 @@
 
 $PARAMS_STRUCT = "sse4" if SSE >= 4 else "sse2"
 $ISA = "avx" if AVX else {2: "sse2", 3: "ssse3", 4: "sse41"}[SSE]
-void xnn_qs8_dwconv_minmax_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__${ISA}_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__${ISA}_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/unipass-sse-mul32.c.in b/src/qs8-dwconv/unipass-sse-mul32.c.in
index 739501c..6e89d94 100644
--- a/src/qs8-dwconv/unipass-sse-mul32.c.in
+++ b/src/qs8-dwconv/unipass-sse-mul32.c.in
@@ -26,7 +26,7 @@
 
 
 $ISA = "xop" if XOP else "avx" if AVX else {4: "sse41"}[SSE]
-void xnn_qs8_dwconv_minmax_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__${ISA}_mul32(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__${ISA}_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-dwconv/unipass-wasmsimd-mul16.c.in b/src/qs8-dwconv/unipass-wasmsimd-mul16.c.in
index 7a74d3b..5031fd6 100644
--- a/src/qs8-dwconv/unipass-wasmsimd-mul16.c.in
+++ b/src/qs8-dwconv/unipass-wasmsimd-mul16.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__wasmsimd_mul16(
+void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__wasmsimd_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
diff --git a/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S b/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S
index 69669ef..7146da6 100644
--- a/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S
+++ b/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -25,7 +25,7 @@
 # C0  x6 v28 v29 v30 v31
 # unused v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32
 0:
         # Load initial bias from w into accumulators
         ADD     x2, x2, 3               // kc = (kc + 3) & ~3
@@ -111,7 +111,7 @@
 6:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S b/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
index 7c8b25b..73113db 100644
--- a/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
+++ b/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -25,7 +25,7 @@
 # C0  x6 v28 v29 v30 v31
 # unused v8 v9 v10 v11 v12 v13 v14 v15
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64
         ADD     x2, x2, 3               // kc = (kc + 3) & ~3
         BIC     x2, x2, 3
 
@@ -145,7 +145,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in b/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
index 30baf88..2e8ce51 100644
--- a/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
+++ b/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -27,7 +27,7 @@
 # x16, x17, x7 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
 
         LDP     x10, x9, [sp]           // cn_stride, params
 
@@ -272,7 +272,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal.S.in b/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal.S.in
index df90eff..99f6cb2 100644
--- a/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal.S.in
+++ b/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -26,7 +26,7 @@
 # temp0  v17 v19 v21 v23
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
 
         LDP     x10, x9, [sp]           // cn_stride, params
 
@@ -247,7 +247,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/2x8c16-minmax-gemmlowp-aarch64-neon-mlal-padal.S b/src/qs8-gemm/2x8c16-minmax-gemmlowp-aarch64-neon-mlal-padal.S
index 40fe81b..9f8c28b 100644
--- a/src/qs8-gemm/2x8c16-minmax-gemmlowp-aarch64-neon-mlal-padal.S
+++ b/src/qs8-gemm/2x8c16-minmax-gemmlowp-aarch64-neon-mlal-padal.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -29,7 +29,7 @@
 # temp1   v3 v11 v13 v15
 # unused  v8 v9
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -210,7 +210,7 @@
         LDP     d10, d11, [sp], 48
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in b/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
index 9f47636..f598124 100644
--- a/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
+++ b/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -30,7 +30,7 @@
 # x16, x17, x20, x21 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -406,7 +406,7 @@
         LDP     d8, d9, [sp], 80
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal.S.in b/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal.S.in
index 408db1a..398aee8 100644
--- a/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal.S.in
+++ b/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -29,7 +29,7 @@
 # temp1   v3 v11 v13 v15
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -353,7 +353,7 @@
         LDP     d8, d9, [sp], 64
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/2x8c8-minmax-gemmlowp-aarch64-neon-mull-padal.S b/src/qs8-gemm/2x8c8-minmax-gemmlowp-aarch64-neon-mull-padal.S
index f4e75da..a5a1444 100644
--- a/src/qs8-gemm/2x8c8-minmax-gemmlowp-aarch64-neon-mull-padal.S
+++ b/src/qs8-gemm/2x8c8-minmax-gemmlowp-aarch64-neon-mull-padal.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -29,7 +29,7 @@
 # temp1   v3 v11 v13 v15
 # unused  v8 v9
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -193,7 +193,7 @@
         LDP     d10, d11, [sp], 48
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/4x16-aarch64-neon-mlal-lane-cortex-a53.S.in b/src/qs8-gemm/4x16-aarch64-neon-mlal-lane-cortex-a53.S.in
index ce92cbf..cb0facf 100644
--- a/src/qs8-gemm/4x16-aarch64-neon-mlal-lane-cortex-a53.S.in
+++ b/src/qs8-gemm/4x16-aarch64-neon-mlal-lane-cortex-a53.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -34,7 +34,7 @@
 
 # x10 x17 a53 temp registers
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -816,7 +816,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-cortex-a55.S b/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-cortex-a55.S
index 5b3e213..811cd5c 100644
--- a/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-cortex-a55.S
+++ b/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-cortex-a55.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -31,7 +31,7 @@
 # C3  x7 v19 v23 v27 v31
 # unused v12 v13 v14 v15
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -658,7 +658,7 @@
         LDP     d8,  d9, [sp], 32
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S b/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S
index e92e6fe..169d7eb 100644
--- a/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S
+++ b/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -31,7 +31,7 @@
 # C3  x7 v19 v23 v27 v31
 # unused v8 v9 v10 v11 v12 v13 v14 v15
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -289,7 +289,7 @@
 6:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S b/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
index bf65995..efb6234 100644
--- a/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
+++ b/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -31,7 +31,7 @@
 # C3  x7 v19 v23 v27 v31
 # unused v8 v9 v10 v11 v12 v13 v14 v15
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -340,7 +340,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/MRx16c8-avx512skx.c.in b/src/qs8-gemm/MRx16c8-avx512skx.c.in
index 7870b6c..12e073f 100644
--- a/src/qs8-gemm/MRx16c8-avx512skx.c.in
+++ b/src/qs8-gemm/MRx16c8-avx512skx.c.in
@@ -16,7 +16,7 @@
 
 
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
-void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_ukernel_${MR}x16c8__avx512skx(
+void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/MRx4c2-sse.c.in b/src/qs8-gemm/MRx4c2-sse.c.in
index 7669bfa..e13c6f2 100644
--- a/src/qs8-gemm/MRx4c2-sse.c.in
+++ b/src/qs8-gemm/MRx4c2-sse.c.in
@@ -29,7 +29,7 @@
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
 $PARAMS_STRUCT = "sse4" if SSE >= 4 else "sse2"
 $ISA = "xop" if XOP else "avx" if AVX else {2: "sse2", 3: "ssse3", 4: "sse41"}[SSE]
-void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_ukernel_${MR}x4c2__${ISA}${LOAD_SUFFIX}(
+void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x4c2__${ISA}${LOAD_SUFFIX}(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/MRx4c8-sse.c.in b/src/qs8-gemm/MRx4c8-sse.c.in
index 0f2ed51..f06aed9 100644
--- a/src/qs8-gemm/MRx4c8-sse.c.in
+++ b/src/qs8-gemm/MRx4c8-sse.c.in
@@ -29,7 +29,7 @@
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
 $PARAMS_STRUCT = "sse4" if SSE >= 4 else "sse2"
 $ISA = "xop" if XOP else "avx" if AVX else {2: "sse2", 3: "ssse3", 4: "sse41"}[SSE]
-void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_ukernel_${MR}x4c8__${ISA}${LOAD_SUFFIX}(
+void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x4c8__${ISA}${LOAD_SUFFIX}(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/MRx4c8-wasmsimd.c.in b/src/qs8-gemm/MRx4c8-wasmsimd.c.in
index 4fced4a..46c1b72 100644
--- a/src/qs8-gemm/MRx4c8-wasmsimd.c.in
+++ b/src/qs8-gemm/MRx4c8-wasmsimd.c.in
@@ -15,7 +15,7 @@
 
 $LOAD_SUFFIX = {"LD128": "_ld128", "LD64": "_ld64", "EXTENDED": ""}[VARIANT]
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
-void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_ukernel_${MR}x4c8__wasmsimd${LOAD_SUFFIX}(
+void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x4c8__wasmsimd${LOAD_SUFFIX}(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/MRx8c8-avx2.c.in b/src/qs8-gemm/MRx8c8-avx2.c.in
index bac1ce2..20e4af5 100644
--- a/src/qs8-gemm/MRx8c8-avx2.c.in
+++ b/src/qs8-gemm/MRx8c8-avx2.c.in
@@ -15,7 +15,7 @@
 
 
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
-void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_ukernel_${MR}x8c8__avx2(
+void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/MRxNRc4-neondot.c.in b/src/qs8-gemm/MRxNRc4-neondot.c.in
index b037bd8..dded8ae 100644
--- a/src/qs8-gemm/MRxNRc4-neondot.c.in
+++ b/src/qs8-gemm/MRxNRc4-neondot.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_${MR}x${NR}c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/c16-neon-mlal-padal.c.in b/src/qs8-gemm/c16-neon-mlal-padal.c.in
index 15f28b7..cb7ae46 100644
--- a/src/qs8-gemm/c16-neon-mlal-padal.c.in
+++ b/src/qs8-gemm/c16-neon-mlal-padal.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_${MR}x${NR}c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/c2-neon-mull-padal-dup.c.in b/src/qs8-gemm/c2-neon-mull-padal-dup.c.in
index f9281fa..235d9e0 100644
--- a/src/qs8-gemm/c2-neon-mull-padal-dup.c.in
+++ b/src/qs8-gemm/c2-neon-mull-padal-dup.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}c2__neon_${"mlal" if MLA else "mull"}_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_${MR}x${NR}c2__neon_${"mlal" if MLA else "mull"}_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/c8-neon-mull-padal.c.in b/src/qs8-gemm/c8-neon-mull-padal.c.in
index 3a39bdf..3895a74 100644
--- a/src/qs8-gemm/c8-neon-mull-padal.c.in
+++ b/src/qs8-gemm/c8-neon-mull-padal.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}c8__neon_${"mlal" if MLA else "mull"}_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_${MR}x${NR}c8__neon_${"mlal" if MLA else "mull"}_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 884d3d9..cd26fa5 100644
--- a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
index 72f6a33..bb363e9 100644
--- a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
index 0d1e159..319a9d0 100644
--- a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
index 8777d3d..1735cc8 100644
--- a/src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 7a60c35..262b710 100644
--- a/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 6ab52c5..407cc9b 100644
--- a/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/1x16c4-minmax-gemmlowp-neondot.c
index f73dd33..9439336 100644
--- a/src/qs8-gemm/gen/1x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/1x16c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
index 7729f91..08fff83 100644
--- a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
index a45769e..f2610c8 100644
--- a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
index ac60087..540f4ee 100644
--- a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x2-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/1x2-minmax-gemmlowp-scalar.c
index e206a4b..4454ec1 100644
--- a/src/qs8-gemm/gen/1x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/1x2-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x2__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/1x4-minmax-gemmlowp-scalar.c
index 731650e..58ce276 100644
--- a/src/qs8-gemm/gen/1x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/1x4-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
index 8350dc1..2601b09 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
index 63362ee..d3e3979 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
index 7df288a..d7f76e3 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
index d809fb1..d4703ba 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
index 6b57e90..19ea8d5 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
index 5a75904..e8112b9 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
index b3ea64b..cdcd535 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
index 1401dc4..e24e6cc 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
index 227d0ab..b92097b 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
index 1a3f597..1d4e12c 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-avx.c
index 4c04278..e3e799e 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-avx.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse2.c
index 299d609..1954255 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse41.c
index 975d67c..e3fff33 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse41.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-ssse3.c
index 1b56035..ab14a17 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-xop.c
index 5565473..9e37aeb 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-xop.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
index 9f3ee44..7154798 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
index d537eeb..884a744 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
index 6370331..0a3c9bf 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
index 89adced..d824489 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
index a68701d..eec86d2 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
index 17a890e..e8f39f1 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
index 33104b1..ab3766d 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
index 535b406..b2f0dd1 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index 358cb92..f5b73f0 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index 7cd2050..96d437c 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
index 34e3cb0..a8f1eb7 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
index ebcc4aa..0a3b7b1 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-avx.c
index 6d4827c..608678e 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-avx.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse2.c
index c5da5d5..8304817 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse41.c
index c0ee9fd..cbf6c84 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse41.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-ssse3.c
index 9967e0f..a1a6121 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-wasmsimd.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-wasmsimd.c
index 6983219..2984f88 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-wasmsimd.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-wasmsimd.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-xop.c
index 44bc8cc..afa66cd 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-xop.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 4a0861d..9279a2f 100644
--- a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
index a00f139..0039d94 100644
--- a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
index 2e98ffc..61a756b 100644
--- a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 41d2ce5..206e8a0 100644
--- a/src/qs8-gemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 02257a1..b830974 100644
--- a/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index d5f3247..08c447a 100644
--- a/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/1x8c4-minmax-gemmlowp-neondot.c
index 7c6318d..88822ea 100644
--- a/src/qs8-gemm/gen/1x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/1x8c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
index a7f167c..453eb25 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -31,7 +31,7 @@
 # x16, x17, x7 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
 
         LDP     x10, x9, [sp]           // cn_stride, params
 
@@ -270,7 +270,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
index 3491d88..587c39c 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -31,7 +31,7 @@
 # x16, x17, x7 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
 
         LDP     x10, x9, [sp]           // cn_stride, params
 
@@ -273,7 +273,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
index 120e13f..c876f1b 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -30,7 +30,7 @@
 # temp0  v17 v19 v21 v23
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
 
         LDP     x10, x9, [sp]           // cn_stride, params
 
@@ -245,7 +245,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
index 5084b31..d835538 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -30,7 +30,7 @@
 # temp0  v17 v19 v21 v23
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal
 
         LDP     x10, x9, [sp]           // cn_stride, params
 
@@ -239,7 +239,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c
index 5f0a77c..44c0264 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
index ed094a7..b086df7 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
index 971d6a4..c97a2dc 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c
index 69b6b7c..9146b41 100644
--- a/src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 49580f8..9128db9 100644
--- a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
index b7d4071..d883ca9 100644
--- a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
index 5850903..cab6ad6 100644
--- a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
index eafacd8..6ddcd89 100644
--- a/src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 20959ce..3b33cc3 100644
--- a/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index a306ef6..8d2862d 100644
--- a/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
index cab5fd0..c437aea 100644
--- a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
index de9c3ea..a766207 100644
--- a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
index 84a40e3..c96eaec 100644
--- a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x2-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/2x2-minmax-gemmlowp-scalar.c
index 523cf90..9f4d9b4 100644
--- a/src/qs8-gemm/gen/2x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/2x2-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x2__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/2x4-minmax-gemmlowp-scalar.c
index 48188eb..01f2ab8 100644
--- a/src/qs8-gemm/gen/2x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/2x4-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
index a6bb8cf..e9b0921 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
index 2b72822..d9fde6f 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
index c4ade3b..21bac85 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
index 672ce4a..f0f634f 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
index 60a7aa1..5b3d4ba 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
index 7f5ba39..07bedc7 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
index 53bf77c..be538ab 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
index c2aec7c..49afa05 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
index 6266b33..6ff8eaf 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
index a4f12a4..864db27 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-avx.c
index dad6b55..5b70a39 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-avx.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__avx(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__avx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse2.c
index 4beb018..3391f82 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__sse2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__sse2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse41.c
index a1575e9..2a7e69e 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse41.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__sse41(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__sse41(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-ssse3.c
index d51e490..4edb2ab 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__ssse3(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__ssse3(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-xop.c
index 19dcda2..f202ba3 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-xop.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__xop(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__xop(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
index fc48e65..ebe7be8 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
index 713a705..1224615 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
index 978e912..41ae006 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
index 8d7725e..eed4666 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
index 6236921..1efa46e 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
index d9db58d..67248e0 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
index 26c49c9..4edb5ad 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
index 163a9f8..40cc948 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index a48f0c7..e3dee05 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index 2c37105..3996eaa 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
index 8f2ec5e..160f770 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
index 16bbdcc..f240a7e 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-avx.c
index 3a1a964..db3d8c3 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-avx.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse2.c
index 7764dd8..0f2e070 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse41.c
index 1903f6b..ff3ec8f 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse41.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-ssse3.c
index bde1b65..771050c 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-wasmsimd.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-wasmsimd.c
index e9444f8..4b866f1 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-wasmsimd.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-wasmsimd.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-xop.c
index b15a8f9..60defec 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-xop.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 35fa5c9..3e70e46 100644
--- a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
index 296571f..c24f9af 100644
--- a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
index b5a7b2f..2938a65 100644
--- a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 21b3acc..34a5978 100644
--- a/src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 530d297..3d4e374 100644
--- a/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 83ab0d1..3ba6953 100644
--- a/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
index a9e2aa4..3dba71d 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -34,7 +34,7 @@
 # x16, x17, x20, x21 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -402,7 +402,7 @@
         LDP     d8, d9, [sp], 80
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
index 1470573..caa2e3b 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -34,7 +34,7 @@
 # x16, x17, x20, x21 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -406,7 +406,7 @@
         LDP     d8, d9, [sp], 80
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
index 661ae54..e5be317 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -33,7 +33,7 @@
 # temp1   v3 v11 v13 v15
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -353,7 +353,7 @@
         LDP     d8, d9, [sp], 64
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
index f6df72a..6b0e996 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -33,7 +33,7 @@
 # temp1   v3 v11 v13 v15
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -349,7 +349,7 @@
         LDP     d8, d9, [sp], 64
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c
index be11da6..be9fc9a 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
index 19f7b55..9660125 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
index 41b37d7..d840aba 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c
index f4e707b..732835b 100644
--- a/src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 0fb5e2e..b16dcd3 100644
--- a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
index 9c498fc..4e234e0 100644
--- a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
index 84b77b5..e1b0ad0 100644
--- a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
index 5bd1d85..2a9e2cf 100644
--- a/src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 782bb6b..259cc2e 100644
--- a/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index f5b5951..9912589 100644
--- a/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
index b6d6a6d..40903b2 100644
--- a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
index 9b457f2..03f31f0 100644
--- a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
index 77450dd..93aae3c 100644
--- a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x2-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/3x2-minmax-gemmlowp-scalar.c
index 04253c0..461568a 100644
--- a/src/qs8-gemm/gen/3x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/3x2-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x2__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/3x4-minmax-gemmlowp-scalar.c
index 30a6d51..09eb67a 100644
--- a/src/qs8-gemm/gen/3x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/3x4-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
index 06a9192..2010ae1 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
index cff57f6..31002b2 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
index a3e6c9f..f6b4b10 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
index a6a69b4..629aa5f 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
index 396470b..bd727a6 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
index 227d609..b379ade 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
index 306b6e5..762c63d 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
index 664fd04..09db9ec 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
index f599282..17c0ea1 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
index c7a4ceb..b05257b 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-avx.c
index 0f5d9b1..29abd40 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-avx.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__avx(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__avx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse2.c
index 0b0c916..a54a94f 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__sse2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__sse2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse41.c
index c4b2953..b6f21c4 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse41.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__sse41(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__sse41(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-ssse3.c
index 4cd3e0d..e44996f 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__ssse3(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__ssse3(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-xop.c
index 86d4b3f..6687fef 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-xop.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__xop(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__xop(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
index a2bf9d9..2b106c2 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
index 72b3074..96980f4 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
index 0f57e0f..2da3d50 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
index 10fa7cd..55cc4a7 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
index 6d8579c..4a255bc 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
index 62b0ba2..0ddf914 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
index 81d0de0..9a8dd50 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
index fa0121b..0857eaf 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index 2c11faf..35d7f78 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index 678e305..26f58fc 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
index 4a09afe..d840ac7 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
index 0c874fc..1bb8e6f 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-avx.c
index a4192f6..5168a36 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-avx.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse2.c
index 4d4d116..98bfb01 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse41.c
index a00a4ca..b9eda39 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse41.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-ssse3.c
index 13681e3..c402de7 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-wasmsimd.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-wasmsimd.c
index d0df439..5fe2220 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-wasmsimd.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-wasmsimd.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-xop.c
index e471b8e..2bf623f 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-xop.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 94ddff9..ca03c7b 100644
--- a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
index 6e4a6b4..051e8f2 100644
--- a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
index b66556d..49a11d6 100644
--- a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
index e4ec320..6053eac 100644
--- a/src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index f273682..851629c 100644
--- a/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index c2510de..8001d85 100644
--- a/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c
index d41e047..e87cbef 100644
--- a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
index f7cdbe7..faa1eb7 100644
--- a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
index 0b7651e..18d17a1 100644
--- a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c
index d589e3c..f7d5a09 100644
--- a/src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-cortex-a53.S b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-cortex-a53.S
index 81c990c..0e84934 100644
--- a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-cortex-a53.S
+++ b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -38,7 +38,7 @@
 
 # x10 x17 a53 temp registers
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -808,7 +808,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-prfm-cortex-a53.S b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-prfm-cortex-a53.S
index 4940ffe..4250a81 100644
--- a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-prfm-cortex-a53.S
+++ b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-prfm-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -38,7 +38,7 @@
 
 # x10 x17 a53 temp registers
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -814,7 +814,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index c84ae2d..89c6da0 100644
--- a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
index 4dfd5af..6311f6b 100644
--- a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
index cd1d411..546e6c0 100644
--- a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
index 9831c12..88a8672 100644
--- a/src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 1a21b08..045bcc0 100644
--- a/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 8f6c1a2..1e2eb9a 100644
--- a/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-neondot.c
index 2744fe9..d62c105 100644
--- a/src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
index 2a7b6ca..d3cea1a 100644
--- a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
index 2f5cbb0..1768564 100644
--- a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
index 12294e8..f64cfb1 100644
--- a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x2-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/4x2-minmax-gemmlowp-scalar.c
index 5d9c3ae..e2050cd 100644
--- a/src/qs8-gemm/gen/4x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/4x2-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x2__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/4x4-minmax-gemmlowp-scalar.c
index 725583d..35cf43c 100644
--- a/src/qs8-gemm/gen/4x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/4x4-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
index d3c0a28..0f19279 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
index 43fdc24..83572e9 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
index 20a69e6..daff859 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
index aed4404..5ab288b 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
index 31a3cda..180fabe 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
index b2cf9d6..1bb000d 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
index c1a756e..94f18c0 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
index d1b058d..757c664 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
index b1b2b86..885d0f6 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
index 1bf5524..ff09bbe 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-avx.c
index 8179aeb..0ff4f2d 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-avx.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse2.c
index 60593d2..6fad423 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse41.c
index ca5a399..b59daa4 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse41.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-ssse3.c
index a2cd7d0..a56c299 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-xop.c
index 699237d..99ea366 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-xop.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 2a00c63..427252d 100644
--- a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
index 6742b5c..04c1e2b 100644
--- a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
index 23b5f6f..abb9621 100644
--- a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 8fc445d..5245b23 100644
--- a/src/qs8-gemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index fa2daf0..d9f855b 100644
--- a/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 879fdc7..8a81abc 100644
--- a/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/4x8c4-minmax-gemmlowp-neondot.c
index b609be0..b31b68e 100644
--- a/src/qs8-gemm/gen/4x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/4x8c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
index 96764f3..bd2e83e 100644
--- a/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
index 520ab4a..e37c3a1 100644
--- a/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index f8d3267..8218751 100644
--- a/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
index 00e6d40..db38867 100644
--- a/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/6x16c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/6x16c4-minmax-gemmlowp-neondot.c
index 1bd39bc..5dddcb2 100644
--- a/src/qs8-gemm/gen/6x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/6x16c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 8a4308a..405d6d0 100644
--- a/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
index 63d1f02..577417f 100644
--- a/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/6x8c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/6x8c4-minmax-gemmlowp-neondot.c
index 489e824..7466ac0 100644
--- a/src/qs8-gemm/gen/6x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/6x8c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/8x16c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/8x16c4-minmax-gemmlowp-neondot.c
index 88d722e..cd39d25 100644
--- a/src/qs8-gemm/gen/8x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/8x16c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/8x8c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/8x8c4-minmax-gemmlowp-neondot.c
index 47375be..fdbc6e3 100644
--- a/src/qs8-gemm/gen/8x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/8x8c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/neon-mlal-lane.c.in b/src/qs8-gemm/neon-mlal-lane.c.in
index 0d9bf20..04e952c 100644
--- a/src/qs8-gemm/neon-mlal-lane.c.in
+++ b/src/qs8-gemm/neon-mlal-lane.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}__neon_mlal_lane${"_prfm" if PREFETCH else ""}(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_${MR}x${NR}__neon_mlal_lane${"_prfm" if PREFETCH else ""}(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/neon-mull-addw-dup.c.in b/src/qs8-gemm/neon-mull-addw-dup.c.in
index 3d38ef2..0508020 100644
--- a/src/qs8-gemm/neon-mull-addw-dup.c.in
+++ b/src/qs8-gemm/neon-mull-addw-dup.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_${MR}x${NR}__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/scalar.c.in b/src/qs8-gemm/scalar.c.in
index 314ff76..e94033f 100644
--- a/src/qs8-gemm/scalar.c.in
+++ b/src/qs8-gemm/scalar.c.in
@@ -10,7 +10,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_${MR}x${NR}__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/1x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in b/src/qs8-igemm/1x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
index 98df387..9d96ed8 100644
--- a/src/qs8-igemm/1x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
+++ b/src/qs8-igemm/1x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -29,7 +29,7 @@
 # x16, x17, x7 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
 
         # Clamp C pointers
         LDP     x10, x11, [sp]          // Load cn_stride, a_offset
@@ -292,7 +292,7 @@
 9:
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/1x8c8-aarch64-neon-mlal-padal.S.in b/src/qs8-igemm/1x8c8-aarch64-neon-mlal-padal.S.in
index a61da97..c105196 100644
--- a/src/qs8-igemm/1x8c8-aarch64-neon-mlal-padal.S.in
+++ b/src/qs8-igemm/1x8c8-aarch64-neon-mlal-padal.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -28,7 +28,7 @@
 # temp0  v17 v19 v21 v23
 
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
 
         # Clamp C pointers
         LDP     x10, x11, [sp]          // Load cn_stride, a_offset
@@ -268,7 +268,7 @@
 9:
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/2x8c16-minmax-gemmlowp-aarch64-neon-mlal-padal.S b/src/qs8-igemm/2x8c16-minmax-gemmlowp-aarch64-neon-mlal-padal.S
index bc28ebe..f1a64a5 100644
--- a/src/qs8-igemm/2x8c16-minmax-gemmlowp-aarch64-neon-mlal-padal.S
+++ b/src/qs8-igemm/2x8c16-minmax-gemmlowp-aarch64-neon-mlal-padal.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -31,7 +31,7 @@
 # temp1   v3 v11 v13 v15
 # unused  v8 v9
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal
 
         # Clamp C pointers
         LDP     x10, x11, [sp]          // Load cn_stride, a_offset
@@ -231,7 +231,7 @@
         LDP     d10, d11, [sp], 48
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/2x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in b/src/qs8-igemm/2x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
index 862d901..9a0b8db 100644
--- a/src/qs8-igemm/2x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
+++ b/src/qs8-igemm/2x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -32,7 +32,7 @@
 # x16, x17, x20, x21 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
 
         # Clamp C pointers
         LDP     x10, x11, [sp]          // Load cn_stride, a_offset
@@ -430,7 +430,7 @@
         LDP     d8, d9, [sp], 80
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/2x8c8-aarch64-neon-mlal-padal.S.in b/src/qs8-igemm/2x8c8-aarch64-neon-mlal-padal.S.in
index dc9c076..4d59138 100644
--- a/src/qs8-igemm/2x8c8-aarch64-neon-mlal-padal.S.in
+++ b/src/qs8-igemm/2x8c8-aarch64-neon-mlal-padal.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -30,7 +30,7 @@
 # temp0   v2 v10 v12 v14
 # temp1   v3 v11 v13 v15
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
 
         # Clamp C pointers
         LDP     x10, x11, [sp]          // Load cn_stride, a_offset
@@ -376,7 +376,7 @@
         LDP     d8, d9, [sp], 64
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/4x16-aarch64-neon-mlal-lane-cortex-a53.S.in b/src/qs8-igemm/4x16-aarch64-neon-mlal-lane-cortex-a53.S.in
index c4b183e..03b2a58 100644
--- a/src/qs8-igemm/4x16-aarch64-neon-mlal-lane-cortex-a53.S.in
+++ b/src/qs8-igemm/4x16-aarch64-neon-mlal-lane-cortex-a53.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -35,7 +35,7 @@
 # unused  v8 v9 v10 v11 v12 v13 v14 v15
 # x8, x21 temp for Cortex-A53 loads
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53
 
         # Clamp C pointers
         CMP     x0, 2                   // if mr < 2
@@ -844,7 +844,7 @@
         LDP     x20, x21, [sp], 16
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/4x16c4-minmax-gemmlowp-aarch64-neondot-cortex-a55.S b/src/qs8-igemm/4x16c4-minmax-gemmlowp-aarch64-neondot-cortex-a55.S
index 0774357..4a01759 100644
--- a/src/qs8-igemm/4x16c4-minmax-gemmlowp-aarch64-neondot-cortex-a55.S
+++ b/src/qs8-igemm/4x16c4-minmax-gemmlowp-aarch64-neondot-cortex-a55.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -35,7 +35,7 @@
 
 # x8 temp for Cortex-A55 loads
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55
 
         # Clamp C pointers
         CMP     x0, 2                   // if mr < 2
@@ -692,7 +692,7 @@
         LDR     x20, [sp], 48
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S b/src/qs8-igemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
index ca6f97b..38b6575 100644
--- a/src/qs8-igemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
+++ b/src/qs8-igemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -33,7 +33,7 @@
 # C3   x7 v19 v23 v27 v31
 # unused v8 v9 v10 v11 v12 v13 v14 v15
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64
 
         # Clamp C pointers
         CMP     x0, 2                   // if mr < 2
@@ -368,7 +368,7 @@
 9:
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/MRx16c8-avx512skx.c.in b/src/qs8-igemm/MRx16c8-avx512skx.c.in
index 4eddff2..44e15c5 100644
--- a/src/qs8-igemm/MRx16c8-avx512skx.c.in
+++ b/src/qs8-igemm/MRx16c8-avx512skx.c.in
@@ -16,7 +16,7 @@
 
 
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
-void xnn_qs8_igemm${GEMM_SUFFIX}_minmax_ukernel_${MR}x16c8__avx512skx(
+void xnn_qs8_igemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/MRx4c2-sse.c.in b/src/qs8-igemm/MRx4c2-sse.c.in
index ded0cde..fd1fa0d 100644
--- a/src/qs8-igemm/MRx4c2-sse.c.in
+++ b/src/qs8-igemm/MRx4c2-sse.c.in
@@ -27,7 +27,7 @@
 
 $PARAMS_STRUCT = "sse4" if SSE >= 4 else "sse2"
 $ISA = "xop" if XOP else "avx" if AVX else {2: "sse2", 3: "ssse3", 4: "sse41"}[SSE]
-void xnn_qs8_igemm_minmax_ukernel_${MR}x4c2__${ISA}_${VARIANT.lower()}(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_${MR}x4c2__${ISA}_${VARIANT.lower()}(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/MRx4c8-sse.c.in b/src/qs8-igemm/MRx4c8-sse.c.in
index 2357c61..4a53a24 100644
--- a/src/qs8-igemm/MRx4c8-sse.c.in
+++ b/src/qs8-igemm/MRx4c8-sse.c.in
@@ -27,7 +27,7 @@
 
 $PARAMS_STRUCT = "sse4" if SSE >= 4 else "sse2"
 $ISA = "xop" if XOP else "avx" if AVX else {2: "sse2", 3: "ssse3", 4: "sse41"}[SSE]
-void xnn_qs8_igemm_minmax_ukernel_${MR}x4c8__${ISA}_${VARIANT.lower()}(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_${MR}x4c8__${ISA}_${VARIANT.lower()}(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/MRx4c8-wasmsimd.c.in b/src/qs8-igemm/MRx4c8-wasmsimd.c.in
index 22aec9c..8703f0d 100644
--- a/src/qs8-igemm/MRx4c8-wasmsimd.c.in
+++ b/src/qs8-igemm/MRx4c8-wasmsimd.c.in
@@ -15,7 +15,7 @@
 
 $LOAD_SUFFIX = {"LD128": "_ld128", "LD64": "_ld64", "EXTENDED": ""}[VARIANT]
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
-void xnn_qs8_igemm${GEMM_SUFFIX}_minmax_ukernel_${MR}x4c8__wasmsimd${LOAD_SUFFIX}(
+void xnn_qs8_igemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x4c8__wasmsimd${LOAD_SUFFIX}(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/MRx8c8-avx2.c.in b/src/qs8-igemm/MRx8c8-avx2.c.in
index 34df8bb..99f1cd3 100644
--- a/src/qs8-igemm/MRx8c8-avx2.c.in
+++ b/src/qs8-igemm/MRx8c8-avx2.c.in
@@ -13,7 +13,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_${MR}x8c8__avx2(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_${MR}x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/MRxNRc4-neondot.c.in b/src/qs8-igemm/MRxNRc4-neondot.c.in
index 65765ca..f0b49b0 100644
--- a/src/qs8-igemm/MRxNRc4-neondot.c.in
+++ b/src/qs8-igemm/MRxNRc4-neondot.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_${MR}x${NR}c4__neondot(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_${MR}x${NR}c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/c16-neon-mlal-padal.c.in b/src/qs8-igemm/c16-neon-mlal-padal.c.in
index e388b60..d02f81e 100644
--- a/src/qs8-igemm/c16-neon-mlal-padal.c.in
+++ b/src/qs8-igemm/c16-neon-mlal-padal.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_${MR}x${NR}c16__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_${MR}x${NR}c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/c2-neon-mull-padal-dup.c.in b/src/qs8-igemm/c2-neon-mull-padal-dup.c.in
index 47ded3c..3a4b914 100644
--- a/src/qs8-igemm/c2-neon-mull-padal-dup.c.in
+++ b/src/qs8-igemm/c2-neon-mull-padal-dup.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_${MR}x${NR}c2__neon_${"mlal" if MLA else "mull"}_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_${MR}x${NR}c2__neon_${"mlal" if MLA else "mull"}_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/c8-neon-mull-padal.c.in b/src/qs8-igemm/c8-neon-mull-padal.c.in
index 5ee7068..196a70e 100644
--- a/src/qs8-igemm/c8-neon-mull-padal.c.in
+++ b/src/qs8-igemm/c8-neon-mull-padal.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_${MR}x${NR}c8__neon_${"mlal" if MLA else "mull"}_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_${MR}x${NR}c8__neon_${"mlal" if MLA else "mull"}_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index db4b2c2..01682b2 100644
--- a/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
index 31abb20..32d6b00 100644
--- a/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
index 80eb6bf..17e0868 100644
--- a/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
index 78c2c1b..f8151f5 100644
--- a/src/qs8-igemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 3302353..6a0ffb0 100644
--- a/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index d0f3a9a..2e29d63 100644
--- a/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x16c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/1x16c4-minmax-gemmlowp-neondot.c
index 562b466..a6a0478 100644
--- a/src/qs8-igemm/gen/1x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/1x16c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
index fd29a93..4e5a727 100644
--- a/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
index f55054d..b2e62e0 100644
--- a/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
index 266213a..ad3ce4b 100644
--- a/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x2-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/1x2-minmax-gemmlowp-scalar.c
index 5348c86..ec74009 100644
--- a/src/qs8-igemm/gen/1x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/1x2-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x2__scalar(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/1x4-minmax-gemmlowp-scalar.c
index d25d7f3..44afd15 100644
--- a/src/qs8-igemm/gen/1x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/1x4-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4__scalar(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
index f309d27..cc0fdfd 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
index 852be0e..4f2b03a 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
index 74c720c..2f5d65c 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
index 12f9ea0..cd7e672 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
index f3a9c3a..bb86be8 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
index 4e9e5f9..6660cff 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
index 4380fab..952c34d 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
index 655c345..6689a43 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
index 95a2775..98462b9 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
index 841e214..7db8c4a 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
index 58d01b6..0139649 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
index d2dd638..5e41b07 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
index 1793705..558942f 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
index 5d1fdae..abfed40 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
index eaffaa6..35c7f15 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
index ad2344f..86ea5bc 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
index da9a1c1..7dce8bf 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
index 520ba62..f70dfe7 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index 22c44b8..698b1c2 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index 3462440..05a1aed 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
index b0646d5..e1d97fc 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
index 7403cf2..566417e 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 5402416..8ece13e 100644
--- a/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
index c015e50..78eb749 100644
--- a/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
index bcf3557..b61af38 100644
--- a/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 8138af7..706e229 100644
--- a/src/qs8-igemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 0cdfb22..c3cd147 100644
--- a/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index c4e7719..12abba6 100644
--- a/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x8c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/1x8c4-minmax-gemmlowp-neondot.c
index 462f5f8..3c36334 100644
--- a/src/qs8-igemm/gen/1x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/1x8c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
index 191f29c..22f6dce 100644
--- a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
+++ b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -33,7 +33,7 @@
 # x16, x17, x7 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
 
         # Clamp C pointers
         LDP     x10, x11, [sp]          // Load cn_stride, a_offset
@@ -290,7 +290,7 @@
 9:
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
index 60e5690..7432bc9 100644
--- a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
+++ b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -33,7 +33,7 @@
 # x16, x17, x7 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
 
         # Clamp C pointers
         LDP     x10, x11, [sp]          // Load cn_stride, a_offset
@@ -293,7 +293,7 @@
 9:
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
index 91b25b2..b8b8fdc 100644
--- a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
+++ b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -32,7 +32,7 @@
 # temp0  v17 v19 v21 v23
 
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
 
         # Clamp C pointers
         LDP     x10, x11, [sp]          // Load cn_stride, a_offset
@@ -266,7 +266,7 @@
 9:
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
index 6d9f584..177d0a3 100644
--- a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
+++ b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -32,7 +32,7 @@
 # temp0  v17 v19 v21 v23
 
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal
 
         # Clamp C pointers
         LDP     x10, x11, [sp]          // Load cn_stride, a_offset
@@ -260,7 +260,7 @@
 9:
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-avx2.c b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-avx2.c
index b988ae6..6425aab 100644
--- a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
index d9d019c..13106af 100644
--- a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
index 4ba8e9d..1005f20 100644
--- a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 5e44b61..d016b77 100644
--- a/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
index 5e43c14..141a90f 100644
--- a/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
index b4ca64f..36633c2 100644
--- a/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
index 1e09bef..87ac69d 100644
--- a/src/qs8-igemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 9a43b7f..747974b 100644
--- a/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 8b1aa64..378be07 100644
--- a/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
index 1a3b682..7df5372 100644
--- a/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
index cb9def0..9f81ab2 100644
--- a/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
index f38918b..7e67595 100644
--- a/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x2-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/2x2-minmax-gemmlowp-scalar.c
index 3df4ea9..d00969c 100644
--- a/src/qs8-igemm/gen/2x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/2x2-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x2__scalar(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/2x4-minmax-gemmlowp-scalar.c
index 41f6d3b..604724d 100644
--- a/src/qs8-igemm/gen/2x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/2x4-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4__scalar(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
index cc08b92..21c2ab0 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
index 21823a3..8c732d0 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
index 89f3698..f44e287 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
index d59bf28..f9c23a9 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
index b852014..ccc6837 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
index 4532c35..6574638 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
index 10fc1e4..3e938c5 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
index 175955b..2c7bb65 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
index 71b9cab..bc8de63 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
index 88649df..d7ba893 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
index 9307dc8..d5e1ccc 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
index b92f417..b0da08e 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
index 9304588..68a37f8 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
index c6773a8..ab3cb2a 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
index 0c1f2ab..d347a52 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
index 96af574..7297ec2 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
index 95a3f59..8595f87 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
index ec11e48..72c693b 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index 48c61b0..eef58f3 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index fd14cfa..0a27efb 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
index bd3c5a7..7aca948 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
index bb0a7c2..3c84d46 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index e00f237..5bf0385 100644
--- a/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
index 17a491a..f85fbae 100644
--- a/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
index c1e4562..263868b 100644
--- a/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 3066e60..6c951af 100644
--- a/src/qs8-igemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 9545632..082533b 100644
--- a/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index a62dc26..1c9c0f5 100644
--- a/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
index badd8a2..dd54ced 100644
--- a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
+++ b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -36,7 +36,7 @@
 # x16, x17, x20, x21 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
 
         # Clamp C pointers
         LDP     x10, x11, [sp]          // Load cn_stride, a_offset
@@ -426,7 +426,7 @@
         LDP     d8, d9, [sp], 80
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
index fe63272..819b9ac 100644
--- a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
+++ b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -36,7 +36,7 @@
 # x16, x17, x20, x21 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
 
         # Clamp C pointers
         LDP     x10, x11, [sp]          // Load cn_stride, a_offset
@@ -430,7 +430,7 @@
         LDP     d8, d9, [sp], 80
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
index b38fa1e..8c28b34 100644
--- a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
+++ b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -34,7 +34,7 @@
 # temp0   v2 v10 v12 v14
 # temp1   v3 v11 v13 v15
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
 
         # Clamp C pointers
         LDP     x10, x11, [sp]          // Load cn_stride, a_offset
@@ -376,7 +376,7 @@
         LDP     d8, d9, [sp], 64
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
index ce266a3..e2a0b28 100644
--- a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
+++ b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -34,7 +34,7 @@
 # temp0   v2 v10 v12 v14
 # temp1   v3 v11 v13 v15
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal
 
         # Clamp C pointers
         LDP     x10, x11, [sp]          // Load cn_stride, a_offset
@@ -372,7 +372,7 @@
         LDP     d8, d9, [sp], 64
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-avx2.c b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-avx2.c
index b33bfa9..caca667 100644
--- a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
index 1f2bf2d..a3af826 100644
--- a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
index 5f109d6..31bda68 100644
--- a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index f6886b6..f61a9de 100644
--- a/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
index 68c61de..0a0cb9e 100644
--- a/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
index ddd6d7d..afc1431 100644
--- a/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
index e382f8b..9662891 100644
--- a/src/qs8-igemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 4bb026f..eb37a3d 100644
--- a/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 6d14058..f176861 100644
--- a/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
index 73d32f0..c4d719b 100644
--- a/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
index fe9e909..eebeef7 100644
--- a/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
index d183585..be2f12e 100644
--- a/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x2-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/3x2-minmax-gemmlowp-scalar.c
index 26376a3..c7e5fff 100644
--- a/src/qs8-igemm/gen/3x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/3x2-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x2__scalar(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/3x4-minmax-gemmlowp-scalar.c
index 37be2a5..5a5d479 100644
--- a/src/qs8-igemm/gen/3x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/3x4-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4__scalar(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
index 07170b4..32e68f2 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
index a6224db..d264f88 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
index 991e596..9dcad31 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
index 1dff2c6..ecf380a 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
index e9ac047..a615fc1 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
index cf64636..f3b352b 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
index fedb5a0..e825bef 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
index 0a105fb..86c9cbe 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
index 6fd187b..9e4a7d8 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
index c0a46f4..7414a94 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
index 7d5fff1..36af6c3 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
index 330eaa0..cb70d63 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
index 0022e00..284079b 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
index 83d4334..5a6247f 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
index a39c3ab..79a11eb 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
index 82eec9e..ee3e7cc 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
index 0a35711..7373193 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
index 27e040c..d6dfef6 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index 1ef1508..f75f48e 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index 822c65a..85b4a7d 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
index 96d42c0..53f2195 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
index f311e3c..09fde8b 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index c00d092..e848075 100644
--- a/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
index 311e806..01c7412 100644
--- a/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
index 5091523..7f2be2d 100644
--- a/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 4341423..a5b0286 100644
--- a/src/qs8-igemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 69ab549..a4ce62b 100644
--- a/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 4734ce3..d551100 100644
--- a/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-avx2.c b/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-avx2.c
index 2055125..63bcf90 100644
--- a/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
index 35581ca..31e55a9 100644
--- a/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
index fd579d7..8780486 100644
--- a/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-cortex-a53.S b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-cortex-a53.S
index e84ace2..e29d581 100644
--- a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-cortex-a53.S
+++ b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -39,7 +39,7 @@
 # unused  v8 v9 v10 v11 v12 v13 v14 v15
 # x8, x21 temp for Cortex-A53 loads
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
 
         # Clamp C pointers
         CMP     x0, 2                   // if mr < 2
@@ -836,7 +836,7 @@
         LDP     x20, x21, [sp], 16
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-prfm-cortex-a53.S b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-prfm-cortex-a53.S
index ce91ed5..a8dd985 100644
--- a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-prfm-cortex-a53.S
+++ b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-prfm-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(
+# void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -39,7 +39,7 @@
 # unused  v8 v9 v10 v11 v12 v13 v14 v15
 # x8, x21 temp for Cortex-A53 loads
 
-BEGIN_FUNCTION xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
+BEGIN_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
 
         # Clamp C pointers
         CMP     x0, 2                   // if mr < 2
@@ -842,7 +842,7 @@
         LDP     x20, x21, [sp], 16
         RET
 
-END_FUNCTION xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
+END_FUNCTION xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 0169f60..a86d671 100644
--- a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
index 3a48d1e..ef22d36 100644
--- a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
index 304b2df..05aef6c 100644
--- a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
index c8b084f..fe0baa2 100644
--- a/src/qs8-igemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 17d2623..8429af6 100644
--- a/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index d402f2f..8176e11 100644
--- a/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-neondot.c
index 66f1317..dde4046 100644
--- a/src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
index 2752f79..4630194 100644
--- a/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
index b97ecf7..dace7e7 100644
--- a/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
index 33a9ff1..3aa103b 100644
--- a/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x2-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/4x2-minmax-gemmlowp-scalar.c
index 4c2bdae..934799b 100644
--- a/src/qs8-igemm/gen/4x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/4x2-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x2__scalar(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x4-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/4x4-minmax-gemmlowp-scalar.c
index b22b281..f7be8f5 100644
--- a/src/qs8-igemm/gen/4x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/4x4-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x4__scalar(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
index e341603..137cf4e 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
index 2d815c9..4d01d59 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
index 3e86a97..ca39bac 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
index bdb9caa..66ef038 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
index 09e76c0..e2381ae 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
index 66c499c..6b06911 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
index d8559fe..e18f6ea 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
index 2318f10..f4c7be1 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
index 0ce5964..fea4ce0 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
index b44fa57..b971803 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index aa8ea4c..ccada47 100644
--- a/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
index 75cf5d3..fbbc02e 100644
--- a/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
index abe1d12..06556f9 100644
--- a/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 807df44..2207ceb 100644
--- a/src/qs8-igemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index fe50d25..ca84aba 100644
--- a/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index de6c5cb..9e5542d 100644
--- a/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x8c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/4x8c4-minmax-gemmlowp-neondot.c
index 8f4fae8..d3d7681 100644
--- a/src/qs8-igemm/gen/4x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/4x8c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
index 7328bbe..ada160f 100644
--- a/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
index 26179ea..11e1792 100644
--- a/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 2529389..3bb2417 100644
--- a/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
index 660b7aa..5af8751 100644
--- a/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/6x16c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/6x16c4-minmax-gemmlowp-neondot.c
index c887d8b..06ee29d 100644
--- a/src/qs8-igemm/gen/6x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/6x16c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 6418730..e3a6be1 100644
--- a/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
index 5e5e698..9231e07 100644
--- a/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/6x8c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/6x8c4-minmax-gemmlowp-neondot.c
index ba44c4e..247fb20 100644
--- a/src/qs8-igemm/gen/6x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/6x8c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/8x16c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/8x16c4-minmax-gemmlowp-neondot.c
index 3083482..191c27f 100644
--- a/src/qs8-igemm/gen/8x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/8x16c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/gen/8x8c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/8x8c4-minmax-gemmlowp-neondot.c
index a00dd50..420a821 100644
--- a/src/qs8-igemm/gen/8x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/8x8c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/neon-mlal-lane.c.in b/src/qs8-igemm/neon-mlal-lane.c.in
index 214c431..5fcebaf 100644
--- a/src/qs8-igemm/neon-mlal-lane.c.in
+++ b/src/qs8-igemm/neon-mlal-lane.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/igemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_${MR}x${NR}__neon_mlal_lane${"_prfm" if PREFETCH else ""}(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_${MR}x${NR}__neon_mlal_lane${"_prfm" if PREFETCH else ""}(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/neon-mull-addw-dup.c.in b/src/qs8-igemm/neon-mull-addw-dup.c.in
index 45f3ff4..f229b60 100644
--- a/src/qs8-igemm/neon-mull-addw-dup.c.in
+++ b/src/qs8-igemm/neon-mull-addw-dup.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_${MR}x${NR}__neon_mull_addw_dup(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_${MR}x${NR}__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-igemm/scalar.c.in b/src/qs8-igemm/scalar.c.in
index dd29263..71ad765 100644
--- a/src/qs8-igemm/scalar.c.in
+++ b/src/qs8-igemm/scalar.c.in
@@ -10,7 +10,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_igemm_minmax_ukernel_${MR}x${NR}__scalar(
+void xnn_qs8_igemm_minmax_gemmlowp_ukernel_${MR}x${NR}__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/xnnpack/dwconv.h b/src/xnnpack/dwconv.h
index 7d90e98..b478e31 100644
--- a/src/xnnpack/dwconv.h
+++ b/src/xnnpack/dwconv.h
@@ -290,109 +290,109 @@
     const int8_t* zero,                                             \
     const union xnn_qs8_gemm_params* params);
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up1x9__scalar)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16)
 
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up1x25__scalar)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar)
-DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar)
 
 
 #define DECLARE_F32_DWCONV2D_CHW_MINMAX_UKERNEL_FUNCTION(fn_name) \
diff --git a/src/xnnpack/gemm.h b/src/xnnpack/gemm.h
index cd81081..399c98d 100644
--- a/src/xnnpack/gemm.h
+++ b/src/xnnpack/gemm.h
@@ -518,236 +518,236 @@
       size_t cn_stride,                                   \
       const union xnn_qs8_gemm_params* params);
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x2__scalar)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x2__scalar)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x2__scalar)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x2__scalar)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar)
 
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_1x4__scalar)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_2x4__scalar)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_3x4__scalar)
-DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_ukernel_4x4__scalar)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar)
 
 
 #define DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(fn_name) \
@@ -763,58 +763,58 @@
       size_t cn_stride,                                      \
       const union xnn_qs8_gemm_params* params);
 
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__sse2)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__sse2)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__sse2)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__sse2)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2)
 
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__ssse3)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__ssse3)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__ssse3)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__ssse3)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3)
 
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__sse41)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__sse41)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__sse41)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__sse41)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41)
 
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__avx)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__avx)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__avx)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__avx)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx)
 
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__xop)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__xop)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__xop)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__xop)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop)
 
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2)
 
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3)
 
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41)
 
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx)
 
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop)
 
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2)
 
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd)
-DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd)
+DECLARE_QS8_GEMM_XW_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd)
 
 
 #ifdef __cplusplus
diff --git a/src/xnnpack/igemm.h b/src/xnnpack/igemm.h
index 08350e3..a7e34bd 100644
--- a/src/xnnpack/igemm.h
+++ b/src/xnnpack/igemm.h
@@ -327,230 +327,230 @@
       const int8_t* zero,                                  \
       const union xnn_qs8_gemm_params* params);
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x2__scalar)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x2__scalar)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x2__scalar)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x2__scalar)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar)
 
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_1x4__scalar)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_2x4__scalar)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_3x4__scalar)
-DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_ukernel_4x4__scalar)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar)
 
 
 #ifdef __cplusplus
diff --git a/test/qs8-dwconv-minmax-gemmlowp.cc b/test/qs8-dwconv-minmax-gemmlowp.cc
new file mode 100644
index 0000000..43e5a75
--- /dev/null
+++ b/test/qs8-dwconv-minmax-gemmlowp.cc
@@ -0,0 +1,14247 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+// All rights reserved.
+//
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+//
+// Auto-generated file. Do not edit!
+//   Specification: test/qs8-dwconv-minmax-gemmlowp.yaml
+//   Generator: tools/generate-dwconv-test.py
+
+
+#include <gtest/gtest.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
+#include <xnnpack/dwconv.h>
+#include "dwconv-microkernel-tester.h"
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, c_eq_8) {
+    TEST_REQUIRES_ARM_NEON;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(9)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, c_div_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, c_div_8_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, c_div_8_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, c_lt_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, c_gt_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, c_gt_8_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, c_gt_8_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, multipixel) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, c_eq_16) {
+    TEST_REQUIRES_ARM_NEON;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(9)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, c_div_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, c_div_16_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, c_div_16_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, c_lt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, c_gt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, c_gt_16_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, c_gt_16_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, multipixel) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, c_eq_24) {
+    TEST_REQUIRES_ARM_NEON;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(9)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, c_div_24) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, c_div_24_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, c_div_24_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, c_lt_24) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, c_gt_24) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, c_gt_24_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, c_gt_24_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, multipixel) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, c_eq_32) {
+    TEST_REQUIRES_ARM_NEON;
+    DWConvMicrokernelTester()
+      .cr(32)
+      .kr(9)
+      .channels(32)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, c_div_32) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, c_div_32_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, c_div_32_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, c_lt_32) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 1; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, c_gt_32) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, c_gt_32_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, c_gt_32_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, multipixel) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(32)
+        .width(5)
+        .output_stride(163)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(9)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, c_eq_8) {
+    TEST_REQUIRES_X86_SSE2;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(9)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, c_div_8) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, c_lt_8) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, c_gt_8) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, zero) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, c_eq_16) {
+    TEST_REQUIRES_X86_SSE2;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(9)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, c_div_16) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, c_lt_16) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, c_gt_16) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, zero) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, c_eq_24) {
+    TEST_REQUIRES_X86_SSE2;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(9)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, c_div_24) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, c_lt_24) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, c_gt_24) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, zero) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, c_eq_8) {
+    TEST_REQUIRES_X86_SSSE3;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(9)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, c_div_8) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, c_lt_8) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, c_gt_8) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, zero) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, c_eq_16) {
+    TEST_REQUIRES_X86_SSSE3;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(9)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, c_div_16) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, c_lt_16) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, c_gt_16) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, zero) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, c_eq_24) {
+    TEST_REQUIRES_X86_SSSE3;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(9)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, c_div_24) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, c_lt_24) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, c_gt_24) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, zero) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, c_eq_8) {
+    TEST_REQUIRES_X86_SSE41;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(9)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, c_div_8) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, c_lt_8) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, c_gt_8) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, zero) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, c_eq_16) {
+    TEST_REQUIRES_X86_SSE41;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(9)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, c_div_16) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, c_lt_16) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, c_gt_16) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, zero) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, c_eq_24) {
+    TEST_REQUIRES_X86_SSE41;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(9)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, c_div_24) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, c_lt_24) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, c_gt_24) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, zero) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, c_eq_8) {
+    TEST_REQUIRES_X86_AVX;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(9)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, c_div_8) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, c_lt_8) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, c_gt_8) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, multipixel) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, c_eq_16) {
+    TEST_REQUIRES_X86_AVX;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(9)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, c_div_16) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, c_lt_16) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, c_gt_16) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, multipixel) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, c_eq_24) {
+    TEST_REQUIRES_X86_AVX;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(9)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, c_div_24) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, c_lt_24) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, c_gt_24) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, multipixel) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, c_eq_16) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(9)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, c_div_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, c_lt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, c_gt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, c_eq_32) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(32)
+      .kr(9)
+      .channels(32)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, c_div_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, c_div_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, c_div_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, c_lt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, c_gt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, c_gt_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, c_gt_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(32)
+        .width(5)
+        .output_stride(163)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(9)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, c_eq_8) {
+    TEST_REQUIRES_X86_SSE41;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(9)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, c_div_8) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, c_lt_8) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, c_gt_8) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, multipixel) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, input_offset) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, zero) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, c_eq_16) {
+    TEST_REQUIRES_X86_SSE41;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(9)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, c_div_16) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, c_lt_16) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, c_gt_16) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, multipixel) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, input_offset) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, zero) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, c_eq_24) {
+    TEST_REQUIRES_X86_SSE41;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(9)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, c_div_24) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, c_lt_24) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, c_gt_24) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, multipixel) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, input_offset) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, zero) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, c_eq_8) {
+    TEST_REQUIRES_X86_AVX;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(9)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, c_div_8) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, c_lt_8) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, c_gt_8) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, c_eq_16) {
+    TEST_REQUIRES_X86_AVX;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(9)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, c_div_16) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, c_lt_16) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, c_gt_16) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, c_eq_24) {
+    TEST_REQUIRES_X86_AVX;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(9)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, c_div_24) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, c_lt_24) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, c_gt_24) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, c_eq_8) {
+    TEST_REQUIRES_X86_XOP;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(9)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, c_div_8) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, c_lt_8) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, c_gt_8) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, multipixel) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, input_offset) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, zero) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, c_eq_16) {
+    TEST_REQUIRES_X86_XOP;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(9)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, c_div_16) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, c_lt_16) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, c_gt_16) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, multipixel) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, input_offset) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, zero) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, c_eq_24) {
+    TEST_REQUIRES_X86_XOP;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(9)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, c_div_24) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, c_lt_24) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, c_gt_24) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, multipixel) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, input_offset) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, zero) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, c_eq_8) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(9)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, c_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, c_lt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, c_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, c_eq_16) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(9)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, c_div_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, c_lt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, c_gt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, c_eq_24) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(9)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, c_div_24) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, c_lt_24) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, c_gt_24) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, c_eq_32) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(32)
+      .kr(9)
+      .channels(32)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, c_div_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, c_div_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, c_div_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, c_lt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, c_gt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, c_gt_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, c_gt_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(32)
+        .width(5)
+        .output_stride(163)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(9)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, c_eq_16) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(9)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, c_div_16) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, c_lt_16) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, c_gt_16) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, c_eq_32) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    DWConvMicrokernelTester()
+      .cr(32)
+      .kr(9)
+      .channels(32)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, c_div_32) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, c_lt_32) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 1; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, c_gt_32) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(32)
+        .width(5)
+        .output_stride(163)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(9)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, c_eq_8) {
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(9)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, c_div_8) {
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmin) {
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmax) {
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, c_lt_8) {
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, c_gt_8) {
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmin) {
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmax) {
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, multipixel) {
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, multipixel_with_step) {
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, multipixel_with_qmin) {
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, multipixel_with_qmax) {
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, input_offset) {
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, zero) {
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, c_eq_16) {
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(9)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, c_div_16) {
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmin) {
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmax) {
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, c_lt_16) {
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, c_gt_16) {
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmin) {
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmax) {
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, multipixel) {
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, multipixel_with_step) {
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, multipixel_with_qmin) {
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, multipixel_with_qmax) {
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, input_offset) {
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, zero) {
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, c_eq_24) {
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(9)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, c_div_24) {
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmin) {
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmax) {
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, c_lt_24) {
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, c_gt_24) {
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmin) {
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmax) {
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, multipixel) {
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, multipixel_with_step) {
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, multipixel_with_qmin) {
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, multipixel_with_qmax) {
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, input_offset) {
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, zero) {
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X9__SCALAR, c_eq_1) {
+  DWConvMicrokernelTester()
+    .cr(1)
+    .kr(9)
+    .channels(1)
+    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X9__SCALAR, c_gt_1) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X9__SCALAR, c_gt_1_with_qmin) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X9__SCALAR, c_gt_1_with_qmax) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X9__SCALAR, multipixel) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X9__SCALAR, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X9__SCALAR, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(1)
+      .width(5)
+      .output_stride(7)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X9__SCALAR, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X9__SCALAR, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X9__SCALAR, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(9)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X9__SCALAR, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(9)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, c_eq_2) {
+  DWConvMicrokernelTester()
+    .cr(2)
+    .kr(9)
+    .channels(2)
+    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, c_div_2) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, c_div_2_with_qmin) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, c_div_2_with_qmax) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, c_lt_2) {
+  for (uint32_t channels = 1; channels < 2; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, c_gt_2) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, c_gt_2_with_qmin) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, c_gt_2_with_qmax) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, multipixel) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(2)
+      .width(5)
+      .output_stride(13)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(9)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(9)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, c_eq_4) {
+  DWConvMicrokernelTester()
+    .cr(4)
+    .kr(9)
+    .channels(4)
+    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, c_div_4) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, c_div_4_with_qmin) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, c_div_4_with_qmax) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, c_lt_4) {
+  for (uint32_t channels = 1; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, c_gt_4) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, c_gt_4_with_qmin) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, c_gt_4_with_qmax) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, multipixel) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    for (size_t step = 2; step <= 9; step++) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(4)
+      .width(5)
+      .output_stride(23)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, input_offset) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(9)
+      .channels(channels)
+      .input_offset(112)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, zero) {
+  for (uint32_t mz = 0; mz < 9; mz++) {
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(9)
+        .channels(channels)
+        .input_offset(112)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
+    }
+  }
+}
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, c_eq_8) {
+    TEST_REQUIRES_ARM_NEON;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(25)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, c_div_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, c_div_8_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, c_div_8_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, c_lt_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, c_gt_8) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, c_gt_8_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, c_gt_8_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, multipixel) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, c_eq_16) {
+    TEST_REQUIRES_ARM_NEON;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(25)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, c_div_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, c_div_16_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, c_div_16_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, c_lt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, c_gt_16) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, c_gt_16_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, c_gt_16_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, multipixel) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, c_eq_24) {
+    TEST_REQUIRES_ARM_NEON;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(25)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, c_div_24) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, c_div_24_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, c_div_24_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, c_lt_24) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, c_gt_24) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, c_gt_24_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, c_gt_24_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, multipixel) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_ARM || XNN_ARCH_ARM64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, c_eq_32) {
+    TEST_REQUIRES_ARM_NEON;
+    DWConvMicrokernelTester()
+      .cr(32)
+      .kr(25)
+      .channels(32)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, c_div_32) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, c_div_32_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, c_div_32_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, c_lt_32) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 1; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, c_gt_32) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, c_gt_32_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, c_gt_32_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, multipixel) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(32)
+        .width(5)
+        .output_stride(163)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_ARM_NEON;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, input_offset) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, zero) {
+    TEST_REQUIRES_ARM_NEON;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(25)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, c_eq_8) {
+    TEST_REQUIRES_X86_SSE2;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(25)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, c_div_8) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, c_lt_8) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, c_gt_8) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, zero) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, c_eq_16) {
+    TEST_REQUIRES_X86_SSE2;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(25)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, c_div_16) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, c_lt_16) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, c_gt_16) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, zero) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, c_eq_24) {
+    TEST_REQUIRES_X86_SSE2;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(25)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, c_div_24) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, c_lt_24) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, c_gt_24) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, zero) {
+    TEST_REQUIRES_X86_SSE2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, c_eq_8) {
+    TEST_REQUIRES_X86_SSSE3;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(25)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, c_div_8) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, c_lt_8) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, c_gt_8) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, zero) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, c_eq_16) {
+    TEST_REQUIRES_X86_SSSE3;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(25)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, c_div_16) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, c_lt_16) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, c_gt_16) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, zero) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, c_eq_24) {
+    TEST_REQUIRES_X86_SSSE3;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(25)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, c_div_24) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, c_lt_24) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, c_gt_24) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, zero) {
+    TEST_REQUIRES_X86_SSSE3;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, c_eq_8) {
+    TEST_REQUIRES_X86_SSE41;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(25)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, c_div_8) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, c_lt_8) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, c_gt_8) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, zero) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, c_eq_16) {
+    TEST_REQUIRES_X86_SSE41;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(25)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, c_div_16) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, c_lt_16) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, c_gt_16) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, zero) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, c_eq_24) {
+    TEST_REQUIRES_X86_SSE41;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(25)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, c_div_24) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, c_lt_24) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, c_gt_24) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, multipixel) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, input_offset) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, zero) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, c_eq_8) {
+    TEST_REQUIRES_X86_AVX;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(25)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, c_div_8) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, c_lt_8) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, c_gt_8) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, multipixel) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, c_eq_16) {
+    TEST_REQUIRES_X86_AVX;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(25)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, c_div_16) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, c_lt_16) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, c_gt_16) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, multipixel) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, c_eq_24) {
+    TEST_REQUIRES_X86_AVX;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(25)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, c_div_24) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, c_lt_24) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, c_gt_24) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, multipixel) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, c_eq_16) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(25)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, c_div_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, c_lt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, c_gt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, c_eq_32) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(32)
+      .kr(25)
+      .channels(32)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, c_div_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, c_div_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, c_div_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, c_lt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, c_gt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, c_gt_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, c_gt_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(32)
+        .width(5)
+        .output_stride(163)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(25)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, c_eq_8) {
+    TEST_REQUIRES_X86_SSE41;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(25)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, c_div_8) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, c_lt_8) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, c_gt_8) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, multipixel) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, input_offset) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, zero) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, c_eq_16) {
+    TEST_REQUIRES_X86_SSE41;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(25)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, c_div_16) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, c_lt_16) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, c_gt_16) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, multipixel) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, input_offset) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, zero) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, c_eq_24) {
+    TEST_REQUIRES_X86_SSE41;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(25)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, c_div_24) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, c_lt_24) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, c_gt_24) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, multipixel) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_SSE41;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, input_offset) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, zero) {
+    TEST_REQUIRES_X86_SSE41;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, c_eq_8) {
+    TEST_REQUIRES_X86_AVX;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(25)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, c_div_8) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, c_lt_8) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, c_gt_8) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, c_eq_16) {
+    TEST_REQUIRES_X86_AVX;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(25)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, c_div_16) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, c_lt_16) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, c_gt_16) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, c_eq_24) {
+    TEST_REQUIRES_X86_AVX;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(25)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, c_div_24) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, c_lt_24) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, c_gt_24) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, c_eq_8) {
+    TEST_REQUIRES_X86_XOP;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(25)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, c_div_8) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, c_lt_8) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, c_gt_8) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, multipixel) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, input_offset) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, zero) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, c_eq_16) {
+    TEST_REQUIRES_X86_XOP;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(25)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, c_div_16) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, c_lt_16) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, c_gt_16) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, multipixel) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, input_offset) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, zero) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, c_eq_24) {
+    TEST_REQUIRES_X86_XOP;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(25)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, c_div_24) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, c_lt_24) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, c_gt_24) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, multipixel) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_XOP;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, input_offset) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, zero) {
+    TEST_REQUIRES_X86_XOP;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, c_eq_8) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(25)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, c_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, c_lt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, c_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, c_eq_16) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(25)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, c_div_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, c_lt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, c_gt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, c_eq_24) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(25)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, c_div_24) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, c_lt_24) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, c_gt_24) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, c_eq_32) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(32)
+      .kr(25)
+      .channels(32)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, c_div_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, c_div_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, c_div_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, c_lt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, c_gt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, c_gt_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, c_gt_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(32)
+        .width(5)
+        .output_stride(163)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(25)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, c_eq_16) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(25)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, c_div_16) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, c_lt_16) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, c_gt_16) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, c_eq_32) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    DWConvMicrokernelTester()
+      .cr(32)
+      .kr(25)
+      .channels(32)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, c_div_32) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, c_lt_32) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 1; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, c_gt_32) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(32)
+        .width(5)
+        .output_stride(163)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX512SKX;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(25)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, c_eq_8) {
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(25)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, c_div_8) {
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmin) {
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmax) {
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, c_lt_8) {
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, c_gt_8) {
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmin) {
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmax) {
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, multipixel) {
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, multipixel_with_step) {
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, multipixel_with_qmin) {
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, multipixel_with_qmax) {
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, input_offset) {
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, zero) {
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, c_eq_16) {
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(25)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, c_div_16) {
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmin) {
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmax) {
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, c_lt_16) {
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, c_gt_16) {
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmin) {
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmax) {
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, multipixel) {
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, multipixel_with_step) {
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, multipixel_with_qmin) {
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, multipixel_with_qmax) {
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, input_offset) {
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, zero) {
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+#if XNN_ARCH_WASMSIMD
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, c_eq_24) {
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(25)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, c_div_24) {
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmin) {
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmax) {
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, c_lt_24) {
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, c_gt_24) {
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmin) {
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmax) {
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, multipixel) {
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, multipixel_with_step) {
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, multipixel_with_qmin) {
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, multipixel_with_qmax) {
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, input_offset) {
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, zero) {
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
+      }
+    }
+  }
+#endif  // XNN_ARCH_WASMSIMD
+
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X25__SCALAR, c_eq_1) {
+  DWConvMicrokernelTester()
+    .cr(1)
+    .kr(25)
+    .channels(1)
+    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X25__SCALAR, c_gt_1) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X25__SCALAR, c_gt_1_with_qmin) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X25__SCALAR, c_gt_1_with_qmax) {
+  for (uint32_t channels = 2; channels < 10; channels++) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X25__SCALAR, multipixel) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X25__SCALAR, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X25__SCALAR, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(1)
+      .width(5)
+      .output_stride(7)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X25__SCALAR, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X25__SCALAR, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 5; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X25__SCALAR, input_offset) {
+  for (uint32_t channels = 2; channels < 16; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(1)
+      .kr(25)
+      .channels(channels)
+      .input_offset(48)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X25__SCALAR, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 2; channels < 16; channels += 3) {
+      DWConvMicrokernelTester()
+        .cr(1)
+        .kr(25)
+        .channels(channels)
+        .input_offset(48)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, c_eq_2) {
+  DWConvMicrokernelTester()
+    .cr(2)
+    .kr(25)
+    .channels(2)
+    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, c_div_2) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, c_div_2_with_qmin) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, c_div_2_with_qmax) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, c_lt_2) {
+  for (uint32_t channels = 1; channels < 2; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, c_gt_2) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, c_gt_2_with_qmin) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, c_gt_2_with_qmax) {
+  for (uint32_t channels = 3; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, multipixel) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(2)
+      .width(5)
+      .output_stride(13)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 10; channels += 1) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, input_offset) {
+  for (uint32_t channels = 4; channels < 32; channels += 6) {
+    DWConvMicrokernelTester()
+      .cr(2)
+      .kr(25)
+      .channels(channels)
+      .input_offset(80)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 4; channels < 32; channels += 6) {
+      DWConvMicrokernelTester()
+        .cr(2)
+        .kr(25)
+        .channels(channels)
+        .input_offset(80)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, c_eq_4) {
+  DWConvMicrokernelTester()
+    .cr(4)
+    .kr(25)
+    .channels(4)
+    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, c_div_4) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, c_div_4_with_qmin) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, c_div_4_with_qmax) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, c_lt_4) {
+  for (uint32_t channels = 1; channels < 4; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, c_gt_4) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, c_gt_4_with_qmin) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, c_gt_4_with_qmax) {
+  for (uint32_t channels = 5; channels < 8; channels++) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, multipixel) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, multipixel_with_step) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    for (size_t step = 2; step <= 25; step++) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .step(step)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+    }
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, multipixel_with_output_stride) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(4)
+      .width(5)
+      .output_stride(23)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, multipixel_with_qmin) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmin(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, multipixel_with_qmax) {
+  for (size_t channels = 1; channels <= 20; channels += 3) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .width(3)
+      .qmax(128)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, input_offset) {
+  for (uint32_t channels = 8; channels < 64; channels += 12) {
+    DWConvMicrokernelTester()
+      .cr(4)
+      .kr(25)
+      .channels(channels)
+      .input_offset(112)
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+  }
+}
+
+TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, zero) {
+  for (uint32_t mz = 0; mz < 25; mz++) {
+    for (uint32_t channels = 8; channels < 64; channels += 12) {
+      DWConvMicrokernelTester()
+        .cr(4)
+        .kr(25)
+        .channels(channels)
+        .input_offset(112)
+        .zero_index(mz)
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
+    }
+  }
+}
\ No newline at end of file
diff --git a/test/qs8-dwconv-minmax-gemmlowp.yaml b/test/qs8-dwconv-minmax-gemmlowp.yaml
new file mode 100644
index 0000000..3ad7102
--- /dev/null
+++ b/test/qs8-dwconv-minmax-gemmlowp.yaml
@@ -0,0 +1,160 @@
+# Copyright 2020 Google LLC
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16
+  init: xnn_init_qs8_gemm_neon_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16
+  init: xnn_init_qs8_gemm_neon_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16
+  init: xnn_init_qs8_gemm_neon_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16
+  init: xnn_init_qs8_gemm_neon_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16
+  init: xnn_init_qs8_gemm_avx2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16
+  init: xnn_init_qs8_gemm_avx2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32
+  init: xnn_init_qs8_gemm_avx2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32
+  init: xnn_init_qs8_gemm_avx2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32
+  init: xnn_init_qs8_gemm_avx2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32
+  init: xnn_init_qs8_gemm_avx2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16
+  init: xnn_init_qs8_gemm_wasmsimd_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16
+  init: xnn_init_qs8_gemm_wasmsimd_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16
+  init: xnn_init_qs8_gemm_wasmsimd_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16
+  init: xnn_init_qs8_gemm_neon_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16
+  init: xnn_init_qs8_gemm_neon_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16
+  init: xnn_init_qs8_gemm_neon_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16
+  init: xnn_init_qs8_gemm_neon_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16
+  init: xnn_init_qs8_gemm_avx2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16
+  init: xnn_init_qs8_gemm_avx2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32
+  init: xnn_init_qs8_gemm_sse4_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32
+  init: xnn_init_qs8_gemm_avx2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32
+  init: xnn_init_qs8_gemm_avx2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32
+  init: xnn_init_qs8_gemm_avx2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32
+  init: xnn_init_qs8_gemm_avx2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32
+  init: xnn_init_qs8_gemm_sse2_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16
+  init: xnn_init_qs8_gemm_wasmsimd_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16
+  init: xnn_init_qs8_gemm_wasmsimd_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16
+  init: xnn_init_qs8_gemm_wasmsimd_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+- name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar
+  init: xnn_init_qs8_gemm_scalar_params
diff --git a/test/qs8-dwconv-minmax.cc b/test/qs8-dwconv-minmax.cc
deleted file mode 100644
index 6deb3e6..0000000
--- a/test/qs8-dwconv-minmax.cc
+++ /dev/null
@@ -1,14247 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates.
-// All rights reserved.
-//
-// Copyright 2019 Google LLC
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-//
-// Auto-generated file. Do not edit!
-//   Specification: test/qs8-dwconv-minmax.yaml
-//   Generator: tools/generate-dwconv-test.py
-
-
-#include <gtest/gtest.h>
-
-#include <xnnpack/common.h>
-#include <xnnpack/isa-checks.h>
-
-#include <xnnpack/dwconv.h>
-#include "dwconv-microkernel-tester.h"
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_eq_8) {
-    TEST_REQUIRES_ARM_NEON;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(9)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_div_8) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_div_8_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_div_8_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_lt_8) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_gt_8) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_gt_8_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, c_gt_8_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, input_offset) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__NEON_MUL16, zero) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_eq_16) {
-    TEST_REQUIRES_ARM_NEON;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(9)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_div_16) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_div_16_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_div_16_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_lt_16) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_gt_16) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_gt_16_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, c_gt_16_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, input_offset) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__NEON_MUL16, zero) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_eq_24) {
-    TEST_REQUIRES_ARM_NEON;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(9)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_div_24) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_div_24_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_div_24_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_lt_24) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_gt_24) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_gt_24_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, c_gt_24_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, input_offset) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__NEON_MUL16, zero) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_eq_32) {
-    TEST_REQUIRES_ARM_NEON;
-    DWConvMicrokernelTester()
-      .cr(32)
-      .kr(9)
-      .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_div_32) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_div_32_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_div_32_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_lt_32) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 1; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_gt_32) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_gt_32_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, c_gt_32_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(32)
-        .width(5)
-        .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, input_offset) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__NEON_MUL16, zero) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 64; channels < 512; channels += 96) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(9)
-          .channels(channels)
-          .input_offset(592)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_eq_8) {
-    TEST_REQUIRES_X86_SSE2;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(9)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_div_8) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_lt_8) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_gt_8) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE2_MUL16, zero) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_eq_16) {
-    TEST_REQUIRES_X86_SSE2;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(9)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_div_16) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_lt_16) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_gt_16) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE2_MUL16, zero) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_eq_24) {
-    TEST_REQUIRES_X86_SSE2;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(9)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_div_24) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_lt_24) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_gt_24) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE2_MUL16, zero) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_eq_8) {
-    TEST_REQUIRES_X86_SSSE3;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(9)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_div_8) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_lt_8) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_gt_8) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSSE3_MUL16, zero) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_eq_16) {
-    TEST_REQUIRES_X86_SSSE3;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(9)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_div_16) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_lt_16) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_gt_16) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSSE3_MUL16, zero) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_eq_24) {
-    TEST_REQUIRES_X86_SSSE3;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(9)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_div_24) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_lt_24) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_gt_24) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSSE3_MUL16, zero) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_eq_8) {
-    TEST_REQUIRES_X86_SSE41;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(9)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_div_8) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_lt_8) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_gt_8) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL16, zero) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_eq_16) {
-    TEST_REQUIRES_X86_SSE41;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(9)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_div_16) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_lt_16) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_gt_16) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL16, zero) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_eq_24) {
-    TEST_REQUIRES_X86_SSE41;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(9)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_div_24) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_lt_24) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_gt_24) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL16, zero) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_eq_8) {
-    TEST_REQUIRES_X86_AVX;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(9)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_div_8) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_lt_8) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_gt_8) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, multipixel) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, input_offset) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL16, zero) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_eq_16) {
-    TEST_REQUIRES_X86_AVX;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(9)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_div_16) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_lt_16) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_gt_16) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, multipixel) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, input_offset) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL16, zero) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_eq_24) {
-    TEST_REQUIRES_X86_AVX;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(9)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_div_24) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_lt_24) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_gt_24) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, multipixel) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, input_offset) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL16, zero) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_eq_16) {
-    TEST_REQUIRES_X86_AVX2;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(9)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_div_16) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_lt_16) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_gt_16) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, input_offset) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL16, zero) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_eq_32) {
-    TEST_REQUIRES_X86_AVX2;
-    DWConvMicrokernelTester()
-      .cr(32)
-      .kr(9)
-      .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_div_32) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_div_32_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_div_32_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_lt_32) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 1; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_gt_32) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_gt_32_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, c_gt_32_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(32)
-        .width(5)
-        .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, input_offset) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL16, zero) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 64; channels < 512; channels += 96) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(9)
-          .channels(channels)
-          .input_offset(592)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_eq_8) {
-    TEST_REQUIRES_X86_SSE41;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(9)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_div_8) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_lt_8) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_gt_8) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, multipixel) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, input_offset) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__SSE41_MUL32, zero) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_eq_16) {
-    TEST_REQUIRES_X86_SSE41;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(9)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_div_16) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_lt_16) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_gt_16) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, multipixel) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, input_offset) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__SSE41_MUL32, zero) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_eq_24) {
-    TEST_REQUIRES_X86_SSE41;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(9)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_div_24) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_lt_24) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_gt_24) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, multipixel) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, input_offset) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__SSE41_MUL32, zero) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_eq_8) {
-    TEST_REQUIRES_X86_AVX;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(9)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_div_8) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_lt_8) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_gt_8) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_eq_16) {
-    TEST_REQUIRES_X86_AVX;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(9)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_div_16) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_lt_16) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_gt_16) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_eq_24) {
-    TEST_REQUIRES_X86_AVX;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(9)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_div_24) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_lt_24) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_gt_24) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_eq_8) {
-    TEST_REQUIRES_X86_XOP;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(9)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_div_8) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_lt_8) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_gt_8) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, multipixel) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, input_offset) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__XOP_MUL32, zero) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_eq_16) {
-    TEST_REQUIRES_X86_XOP;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(9)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_div_16) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_lt_16) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_gt_16) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, multipixel) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, input_offset) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__XOP_MUL32, zero) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_eq_24) {
-    TEST_REQUIRES_X86_XOP;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(9)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_div_24) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_lt_24) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_gt_24) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, multipixel) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, input_offset) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__XOP_MUL32, zero) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_eq_8) {
-    TEST_REQUIRES_X86_AVX2;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(9)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_div_8) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_lt_8) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_gt_8) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__AVX2_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_eq_16) {
-    TEST_REQUIRES_X86_AVX2;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(9)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_div_16) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_lt_16) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_gt_16) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX2_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_eq_24) {
-    TEST_REQUIRES_X86_AVX2;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(9)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_div_24) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_lt_24) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_gt_24) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__AVX2_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_eq_32) {
-    TEST_REQUIRES_X86_AVX2;
-    DWConvMicrokernelTester()
-      .cr(32)
-      .kr(9)
-      .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_div_32) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_div_32_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_div_32_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_lt_32) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 1; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_gt_32) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_gt_32_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, c_gt_32_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(32)
-        .width(5)
-        .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX2_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 64; channels < 512; channels += 96) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(9)
-          .channels(channels)
-          .input_offset(592)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_eq_16) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(9)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_div_16) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_lt_16) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_gt_16) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__AVX512SKX_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_eq_32) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    DWConvMicrokernelTester()
-      .cr(32)
-      .kr(9)
-      .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_div_32) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmin) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_div_32_with_qmax) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_lt_32) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 1; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_gt_32) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmin) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, c_gt_32_with_qmax) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(32)
-        .width(5)
-        .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(9)
-        .channels(channels)
-        .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X9__AVX512SKX_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 64; channels < 512; channels += 96) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(9)
-          .channels(channels)
-          .input_offset(592)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_WASMSIMD
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_eq_8) {
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(9)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_div_8) {
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmin) {
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_div_8_with_qmax) {
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_lt_8) {
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_gt_8) {
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmin) {
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, c_gt_8_with_qmax) {
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel) {
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel_with_step) {
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel_with_qmin) {
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, multipixel_with_qmax) {
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, input_offset) {
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(9)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X9__WASMSIMD_MUL16, zero) {
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(9)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_WASMSIMD
-
-
-#if XNN_ARCH_WASMSIMD
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_eq_16) {
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(9)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_div_16) {
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmin) {
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_div_16_with_qmax) {
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_lt_16) {
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_gt_16) {
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmin) {
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, c_gt_16_with_qmax) {
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel) {
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel_with_step) {
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel_with_qmin) {
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, multipixel_with_qmax) {
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, input_offset) {
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(9)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X9__WASMSIMD_MUL16, zero) {
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(9)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_WASMSIMD
-
-
-#if XNN_ARCH_WASMSIMD
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_eq_24) {
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(9)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_div_24) {
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmin) {
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_div_24_with_qmax) {
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_lt_24) {
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_gt_24) {
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmin) {
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, c_gt_24_with_qmax) {
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel) {
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel_with_step) {
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 9; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel_with_output_stride) {
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel_with_qmin) {
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, multipixel_with_qmax) {
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, input_offset) {
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(9)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X9__WASMSIMD_MUL16, zero) {
-    for (uint32_t mz = 0; mz < 9; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(9)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_WASMSIMD
-
-
-TEST(QS8_DWCONV_MINMAX_UP1X9__SCALAR, c_eq_1) {
-  DWConvMicrokernelTester()
-    .cr(1)
-    .kr(9)
-    .channels(1)
-    .Test(xnn_qs8_dwconv_minmax_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X9__SCALAR, c_gt_1) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X9__SCALAR, c_gt_1_with_qmin) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X9__SCALAR, c_gt_1_with_qmax) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X9__SCALAR, multipixel) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X9__SCALAR, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    for (size_t step = 2; step <= 9; step++) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X9__SCALAR, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(1)
-      .width(5)
-      .output_stride(7)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X9__SCALAR, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X9__SCALAR, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X9__SCALAR, input_offset) {
-  for (uint32_t channels = 2; channels < 16; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(9)
-      .channels(channels)
-      .input_offset(48)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X9__SCALAR, zero) {
-  for (uint32_t mz = 0; mz < 9; mz++) {
-    for (uint32_t channels = 2; channels < 16; channels += 3) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(9)
-        .channels(channels)
-        .input_offset(48)
-        .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up1x9__scalar, xnn_init_qs8_gemm_scalar_params);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, c_eq_2) {
-  DWConvMicrokernelTester()
-    .cr(2)
-    .kr(9)
-    .channels(2)
-    .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, c_div_2) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, c_div_2_with_qmin) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, c_div_2_with_qmax) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, c_lt_2) {
-  for (uint32_t channels = 1; channels < 2; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, c_gt_2) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, c_gt_2_with_qmin) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, c_gt_2_with_qmax) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, multipixel) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    for (size_t step = 2; step <= 9; step++) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(2)
-      .width(5)
-      .output_stride(13)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, input_offset) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(9)
-      .channels(channels)
-      .input_offset(80)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X9__SCALAR, zero) {
-  for (uint32_t mz = 0; mz < 9; mz++) {
-    for (uint32_t channels = 4; channels < 32; channels += 6) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(9)
-        .channels(channels)
-        .input_offset(80)
-        .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar, xnn_init_qs8_gemm_scalar_params);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, c_eq_4) {
-  DWConvMicrokernelTester()
-    .cr(4)
-    .kr(9)
-    .channels(4)
-    .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, c_div_4) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, c_div_4_with_qmin) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, c_div_4_with_qmax) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, c_lt_4) {
-  for (uint32_t channels = 1; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, c_gt_4) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, c_gt_4_with_qmin) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, c_gt_4_with_qmax) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, multipixel) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    for (size_t step = 2; step <= 9; step++) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(9)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(4)
-      .width(5)
-      .output_stride(23)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, input_offset) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(9)
-      .channels(channels)
-      .input_offset(112)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X9__SCALAR, zero) {
-  for (uint32_t mz = 0; mz < 9; mz++) {
-    for (uint32_t channels = 8; channels < 64; channels += 12) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(9)
-        .channels(channels)
-        .input_offset(112)
-        .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar, xnn_init_qs8_gemm_scalar_params);
-    }
-  }
-}
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, c_eq_8) {
-    TEST_REQUIRES_ARM_NEON;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(25)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, c_div_8) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, c_div_8_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, c_div_8_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, c_lt_8) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, c_gt_8) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, c_gt_8_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, c_gt_8_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, multipixel) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, input_offset) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__NEON_MUL16, zero) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, c_eq_16) {
-    TEST_REQUIRES_ARM_NEON;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(25)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, c_div_16) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, c_div_16_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, c_div_16_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, c_lt_16) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, c_gt_16) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, c_gt_16_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, c_gt_16_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, multipixel) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, input_offset) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__NEON_MUL16, zero) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, c_eq_24) {
-    TEST_REQUIRES_ARM_NEON;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(25)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, c_div_24) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, c_div_24_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, c_div_24_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, c_lt_24) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, c_gt_24) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, c_gt_24_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, c_gt_24_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, multipixel) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, input_offset) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__NEON_MUL16, zero) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, c_eq_32) {
-    TEST_REQUIRES_ARM_NEON;
-    DWConvMicrokernelTester()
-      .cr(32)
-      .kr(25)
-      .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, c_div_32) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, c_div_32_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, c_div_32_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, c_lt_32) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 1; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, c_gt_32) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, c_gt_32_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, c_gt_32_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, multipixel) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(32)
-        .width(5)
-        .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_ARM_NEON;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, input_offset) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__NEON_MUL16, zero) {
-    TEST_REQUIRES_ARM_NEON;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 64; channels < 512; channels += 96) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(25)
-          .channels(channels)
-          .input_offset(592)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16, xnn_init_qs8_gemm_neon_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, c_eq_8) {
-    TEST_REQUIRES_X86_SSE2;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(25)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, c_div_8) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, c_lt_8) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, c_gt_8) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE2_MUL16, zero) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, c_eq_16) {
-    TEST_REQUIRES_X86_SSE2;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(25)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, c_div_16) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, c_lt_16) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, c_gt_16) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE2_MUL16, zero) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, c_eq_24) {
-    TEST_REQUIRES_X86_SSE2;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(25)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, c_div_24) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, c_lt_24) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, c_gt_24) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE2_MUL16, zero) {
-    TEST_REQUIRES_X86_SSE2;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, c_eq_8) {
-    TEST_REQUIRES_X86_SSSE3;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(25)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, c_div_8) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, c_lt_8) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, c_gt_8) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSSE3_MUL16, zero) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, c_eq_16) {
-    TEST_REQUIRES_X86_SSSE3;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(25)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, c_div_16) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, c_lt_16) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, c_gt_16) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSSE3_MUL16, zero) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, c_eq_24) {
-    TEST_REQUIRES_X86_SSSE3;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(25)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, c_div_24) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, c_lt_24) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, c_gt_24) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSSE3_MUL16, zero) {
-    TEST_REQUIRES_X86_SSSE3;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, c_eq_8) {
-    TEST_REQUIRES_X86_SSE41;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(25)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, c_div_8) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, c_lt_8) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, c_gt_8) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL16, zero) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, c_eq_16) {
-    TEST_REQUIRES_X86_SSE41;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(25)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, c_div_16) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, c_lt_16) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, c_gt_16) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL16, zero) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, c_eq_24) {
-    TEST_REQUIRES_X86_SSE41;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(25)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, c_div_24) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, c_lt_24) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, c_gt_24) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, multipixel) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, input_offset) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL16, zero) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, c_eq_8) {
-    TEST_REQUIRES_X86_AVX;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(25)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, c_div_8) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, c_lt_8) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, c_gt_8) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, multipixel) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, input_offset) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL16, zero) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, c_eq_16) {
-    TEST_REQUIRES_X86_AVX;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(25)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, c_div_16) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, c_lt_16) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, c_gt_16) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, multipixel) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, input_offset) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL16, zero) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, c_eq_24) {
-    TEST_REQUIRES_X86_AVX;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(25)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, c_div_24) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, c_lt_24) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, c_gt_24) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, multipixel) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, input_offset) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL16, zero) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, c_eq_16) {
-    TEST_REQUIRES_X86_AVX2;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(25)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, c_div_16) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, c_lt_16) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, c_gt_16) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, multipixel) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, input_offset) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL16, zero) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, c_eq_32) {
-    TEST_REQUIRES_X86_AVX2;
-    DWConvMicrokernelTester()
-      .cr(32)
-      .kr(25)
-      .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, c_div_32) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, c_div_32_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, c_div_32_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, c_lt_32) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 1; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, c_gt_32) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, c_gt_32_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, c_gt_32_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, multipixel) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(32)
-        .width(5)
-        .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, input_offset) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL16, zero) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 64; channels < 512; channels += 96) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(25)
-          .channels(channels)
-          .input_offset(592)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, c_eq_8) {
-    TEST_REQUIRES_X86_SSE41;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(25)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, c_div_8) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, c_lt_8) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, c_gt_8) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, multipixel) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, input_offset) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__SSE41_MUL32, zero) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, c_eq_16) {
-    TEST_REQUIRES_X86_SSE41;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(25)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, c_div_16) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, c_lt_16) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, c_gt_16) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, multipixel) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, input_offset) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__SSE41_MUL32, zero) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, c_eq_24) {
-    TEST_REQUIRES_X86_SSE41;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(25)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, c_div_24) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, c_lt_24) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, c_gt_24) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, multipixel) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_SSE41;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, input_offset) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__SSE41_MUL32, zero) {
-    TEST_REQUIRES_X86_SSE41;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, c_eq_8) {
-    TEST_REQUIRES_X86_AVX;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(25)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, c_div_8) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, c_lt_8) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, c_gt_8) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, c_eq_16) {
-    TEST_REQUIRES_X86_AVX;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(25)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, c_div_16) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, c_lt_16) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, c_gt_16) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, c_eq_24) {
-    TEST_REQUIRES_X86_AVX;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(25)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, c_div_24) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, c_lt_24) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, c_gt_24) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, c_eq_8) {
-    TEST_REQUIRES_X86_XOP;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(25)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, c_div_8) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, c_lt_8) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, c_gt_8) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, multipixel) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, input_offset) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__XOP_MUL32, zero) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, c_eq_16) {
-    TEST_REQUIRES_X86_XOP;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(25)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, c_div_16) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, c_lt_16) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, c_gt_16) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, multipixel) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, input_offset) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__XOP_MUL32, zero) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, c_eq_24) {
-    TEST_REQUIRES_X86_XOP;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(25)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, c_div_24) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, c_lt_24) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, c_gt_24) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, multipixel) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_XOP;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, input_offset) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__XOP_MUL32, zero) {
-    TEST_REQUIRES_X86_XOP;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32, xnn_init_qs8_gemm_sse4_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, c_eq_8) {
-    TEST_REQUIRES_X86_AVX2;
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(25)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, c_div_8) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, c_div_8_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, c_div_8_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, c_lt_8) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, c_gt_8) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, c_gt_8_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, c_gt_8_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__AVX2_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, c_eq_16) {
-    TEST_REQUIRES_X86_AVX2;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(25)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, c_div_16) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, c_lt_16) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, c_gt_16) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX2_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, c_eq_24) {
-    TEST_REQUIRES_X86_AVX2;
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(25)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, c_div_24) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, c_div_24_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, c_div_24_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, c_lt_24) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, c_gt_24) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, c_gt_24_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, c_gt_24_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__AVX2_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, c_eq_32) {
-    TEST_REQUIRES_X86_AVX2;
-    DWConvMicrokernelTester()
-      .cr(32)
-      .kr(25)
-      .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, c_div_32) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, c_div_32_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, c_div_32_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, c_lt_32) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 1; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, c_gt_32) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, c_gt_32_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, c_gt_32_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(32)
-        .width(5)
-        .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX2;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX2_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX2;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 64; channels < 512; channels += 96) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(25)
-          .channels(channels)
-          .input_offset(592)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32, xnn_init_qs8_gemm_avx2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, c_eq_16) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(25)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, c_div_16) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, c_div_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, c_lt_16) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, c_gt_16) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmin) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, c_gt_16_with_qmax) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__AVX512SKX_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, c_eq_32) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    DWConvMicrokernelTester()
-      .cr(32)
-      .kr(25)
-      .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, c_div_32) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmin) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, c_div_32_with_qmax) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, c_lt_32) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 1; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, c_gt_32) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmin) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, c_gt_32_with_qmax) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 33; channels < 64; channels++) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, multipixel) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, multipixel_with_step) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, multipixel_with_output_stride) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(32)
-        .width(5)
-        .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, multipixel_with_qmin) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, multipixel_with_qmax) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (size_t channels = 1; channels <= 160; channels += 31) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, input_offset) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t channels = 64; channels < 512; channels += 96) {
-      DWConvMicrokernelTester()
-        .cr(32)
-        .kr(25)
-        .channels(channels)
-        .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP32X25__AVX512SKX_MUL32, zero) {
-    TEST_REQUIRES_X86_AVX512SKX;
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 64; channels < 512; channels += 96) {
-        DWConvMicrokernelTester()
-          .cr(32)
-          .kr(25)
-          .channels(channels)
-          .input_offset(592)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_gemm_sse2_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
-
-
-#if XNN_ARCH_WASMSIMD
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, c_eq_8) {
-    DWConvMicrokernelTester()
-      .cr(8)
-      .kr(25)
-      .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, c_div_8) {
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmin) {
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, c_div_8_with_qmax) {
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, c_lt_8) {
-    for (uint32_t channels = 1; channels < 8; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, c_gt_8) {
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmin) {
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, c_gt_8_with_qmax) {
-    for (uint32_t channels = 9; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, multipixel) {
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, multipixel_with_step) {
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(8)
-        .width(5)
-        .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, multipixel_with_qmin) {
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, multipixel_with_qmax) {
-    for (size_t channels = 1; channels <= 40; channels += 7) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, input_offset) {
-    for (uint32_t channels = 16; channels < 128; channels += 24) {
-      DWConvMicrokernelTester()
-        .cr(8)
-        .kr(25)
-        .channels(channels)
-        .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP8X25__WASMSIMD_MUL16, zero) {
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 16; channels < 128; channels += 24) {
-        DWConvMicrokernelTester()
-          .cr(8)
-          .kr(25)
-          .channels(channels)
-          .input_offset(176)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_WASMSIMD
-
-
-#if XNN_ARCH_WASMSIMD
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, c_eq_16) {
-    DWConvMicrokernelTester()
-      .cr(16)
-      .kr(25)
-      .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, c_div_16) {
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmin) {
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, c_div_16_with_qmax) {
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, c_lt_16) {
-    for (uint32_t channels = 1; channels < 16; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, c_gt_16) {
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmin) {
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, c_gt_16_with_qmax) {
-    for (uint32_t channels = 17; channels < 32; channels++) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, multipixel) {
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, multipixel_with_step) {
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(16)
-        .width(5)
-        .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, multipixel_with_qmin) {
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, multipixel_with_qmax) {
-    for (size_t channels = 1; channels <= 80; channels += 15) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, input_offset) {
-    for (uint32_t channels = 32; channels < 256; channels += 48) {
-      DWConvMicrokernelTester()
-        .cr(16)
-        .kr(25)
-        .channels(channels)
-        .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP16X25__WASMSIMD_MUL16, zero) {
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 32; channels < 256; channels += 48) {
-        DWConvMicrokernelTester()
-          .cr(16)
-          .kr(25)
-          .channels(channels)
-          .input_offset(304)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_WASMSIMD
-
-
-#if XNN_ARCH_WASMSIMD
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, c_eq_24) {
-    DWConvMicrokernelTester()
-      .cr(24)
-      .kr(25)
-      .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, c_div_24) {
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmin) {
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, c_div_24_with_qmax) {
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, c_lt_24) {
-    for (uint32_t channels = 1; channels < 24; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, c_gt_24) {
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmin) {
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, c_gt_24_with_qmax) {
-    for (uint32_t channels = 25; channels < 48; channels++) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, multipixel) {
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, multipixel_with_step) {
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      for (size_t step = 2; step <= 25; step++) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .width(3)
-          .step(step)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-      }
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, multipixel_with_output_stride) {
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(24)
-        .width(5)
-        .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, multipixel_with_qmin) {
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, multipixel_with_qmax) {
-    for (size_t channels = 1; channels <= 120; channels += 23) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, input_offset) {
-    for (uint32_t channels = 48; channels < 384; channels += 72) {
-      DWConvMicrokernelTester()
-        .cr(24)
-        .kr(25)
-        .channels(channels)
-        .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-    }
-  }
-
-  TEST(QS8_DWCONV_MINMAX_UP24X25__WASMSIMD_MUL16, zero) {
-    for (uint32_t mz = 0; mz < 25; mz++) {
-      for (uint32_t channels = 48; channels < 384; channels += 72) {
-        DWConvMicrokernelTester()
-          .cr(24)
-          .kr(25)
-          .channels(channels)
-          .input_offset(464)
-          .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_gemm_wasmsimd_params);
-      }
-    }
-  }
-#endif  // XNN_ARCH_WASMSIMD
-
-
-TEST(QS8_DWCONV_MINMAX_UP1X25__SCALAR, c_eq_1) {
-  DWConvMicrokernelTester()
-    .cr(1)
-    .kr(25)
-    .channels(1)
-    .Test(xnn_qs8_dwconv_minmax_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X25__SCALAR, c_gt_1) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X25__SCALAR, c_gt_1_with_qmin) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X25__SCALAR, c_gt_1_with_qmax) {
-  for (uint32_t channels = 2; channels < 10; channels++) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X25__SCALAR, multipixel) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X25__SCALAR, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    for (size_t step = 2; step <= 25; step++) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X25__SCALAR, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(1)
-      .width(5)
-      .output_stride(7)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X25__SCALAR, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X25__SCALAR, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 5; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X25__SCALAR, input_offset) {
-  for (uint32_t channels = 2; channels < 16; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(1)
-      .kr(25)
-      .channels(channels)
-      .input_offset(48)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP1X25__SCALAR, zero) {
-  for (uint32_t mz = 0; mz < 25; mz++) {
-    for (uint32_t channels = 2; channels < 16; channels += 3) {
-      DWConvMicrokernelTester()
-        .cr(1)
-        .kr(25)
-        .channels(channels)
-        .input_offset(48)
-        .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up1x25__scalar, xnn_init_qs8_gemm_scalar_params);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, c_eq_2) {
-  DWConvMicrokernelTester()
-    .cr(2)
-    .kr(25)
-    .channels(2)
-    .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, c_div_2) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, c_div_2_with_qmin) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, c_div_2_with_qmax) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, c_lt_2) {
-  for (uint32_t channels = 1; channels < 2; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, c_gt_2) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, c_gt_2_with_qmin) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, c_gt_2_with_qmax) {
-  for (uint32_t channels = 3; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, multipixel) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    for (size_t step = 2; step <= 25; step++) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(2)
-      .width(5)
-      .output_stride(13)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 10; channels += 1) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, input_offset) {
-  for (uint32_t channels = 4; channels < 32; channels += 6) {
-    DWConvMicrokernelTester()
-      .cr(2)
-      .kr(25)
-      .channels(channels)
-      .input_offset(80)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP2X25__SCALAR, zero) {
-  for (uint32_t mz = 0; mz < 25; mz++) {
-    for (uint32_t channels = 4; channels < 32; channels += 6) {
-      DWConvMicrokernelTester()
-        .cr(2)
-        .kr(25)
-        .channels(channels)
-        .input_offset(80)
-        .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar, xnn_init_qs8_gemm_scalar_params);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, c_eq_4) {
-  DWConvMicrokernelTester()
-    .cr(4)
-    .kr(25)
-    .channels(4)
-    .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, c_div_4) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, c_div_4_with_qmin) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, c_div_4_with_qmax) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, c_lt_4) {
-  for (uint32_t channels = 1; channels < 4; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, c_gt_4) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, c_gt_4_with_qmin) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, c_gt_4_with_qmax) {
-  for (uint32_t channels = 5; channels < 8; channels++) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, multipixel) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, multipixel_with_step) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    for (size_t step = 2; step <= 25; step++) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(25)
-        .channels(channels)
-        .width(3)
-        .step(step)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-    }
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, multipixel_with_output_stride) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(4)
-      .width(5)
-      .output_stride(23)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, multipixel_with_qmin) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, multipixel_with_qmax) {
-  for (size_t channels = 1; channels <= 20; channels += 3) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .width(3)
-      .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, input_offset) {
-  for (uint32_t channels = 8; channels < 64; channels += 12) {
-    DWConvMicrokernelTester()
-      .cr(4)
-      .kr(25)
-      .channels(channels)
-      .input_offset(112)
-      .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-  }
-}
-
-TEST(QS8_DWCONV_MINMAX_UP4X25__SCALAR, zero) {
-  for (uint32_t mz = 0; mz < 25; mz++) {
-    for (uint32_t channels = 8; channels < 64; channels += 12) {
-      DWConvMicrokernelTester()
-        .cr(4)
-        .kr(25)
-        .channels(channels)
-        .input_offset(112)
-        .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar, xnn_init_qs8_gemm_scalar_params);
-    }
-  }
-}
\ No newline at end of file
diff --git a/test/qs8-dwconv-minmax.yaml b/test/qs8-dwconv-minmax.yaml
deleted file mode 100644
index 3007ce0..0000000
--- a/test/qs8-dwconv-minmax.yaml
+++ /dev/null
@@ -1,160 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16
-  init: xnn_init_qs8_gemm_neon_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16
-  init: xnn_init_qs8_gemm_neon_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16
-  init: xnn_init_qs8_gemm_neon_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16
-  init: xnn_init_qs8_gemm_neon_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x9__sse2_mul16
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x9__sse2_mul16
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x9__ssse3_mul16
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul16
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul16
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul16
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16
-  init: xnn_init_qs8_gemm_avx2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16
-  init: xnn_init_qs8_gemm_avx2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x9__avx_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x9__avx_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x9__avx_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x9__xop_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x9__xop_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x9__xop_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x9__avx2_mul32
-  init: xnn_init_qs8_gemm_avx2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul32
-  init: xnn_init_qs8_gemm_avx2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x9__avx2_mul32
-  init: xnn_init_qs8_gemm_avx2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul32
-  init: xnn_init_qs8_gemm_avx2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16
-  init: xnn_init_qs8_gemm_wasmsimd_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16
-  init: xnn_init_qs8_gemm_wasmsimd_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16
-  init: xnn_init_qs8_gemm_wasmsimd_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up1x9__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up2x9__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up4x9__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x25__neon_mul16
-  init: xnn_init_qs8_gemm_neon_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x25__neon_mul16
-  init: xnn_init_qs8_gemm_neon_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x25__neon_mul16
-  init: xnn_init_qs8_gemm_neon_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up32x25__neon_mul16
-  init: xnn_init_qs8_gemm_neon_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x25__sse2_mul16
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x25__sse2_mul16
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x25__sse2_mul16
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x25__ssse3_mul16
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x25__ssse3_mul16
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x25__ssse3_mul16
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul16
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul16
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul16
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul16
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul16
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul16
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul16
-  init: xnn_init_qs8_gemm_avx2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul16
-  init: xnn_init_qs8_gemm_avx2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x25__sse41_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x25__sse41_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x25__sse41_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x25__avx_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x25__avx_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x25__avx_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x25__xop_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x25__xop_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x25__xop_mul32
-  init: xnn_init_qs8_gemm_sse4_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x25__avx2_mul32
-  init: xnn_init_qs8_gemm_avx2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x25__avx2_mul32
-  init: xnn_init_qs8_gemm_avx2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x25__avx2_mul32
-  init: xnn_init_qs8_gemm_avx2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up32x25__avx2_mul32
-  init: xnn_init_qs8_gemm_avx2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x25__avx512skx_mul32
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up32x25__avx512skx_mul32
-  init: xnn_init_qs8_gemm_sse2_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up8x25__wasmsimd_mul16
-  init: xnn_init_qs8_gemm_wasmsimd_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up16x25__wasmsimd_mul16
-  init: xnn_init_qs8_gemm_wasmsimd_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up24x25__wasmsimd_mul16
-  init: xnn_init_qs8_gemm_wasmsimd_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up1x25__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up2x25__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-- name: xnn_qs8_dwconv_minmax_ukernel_up4x25__scalar
-  init: xnn_init_qs8_gemm_scalar_params
diff --git a/test/qs8-gemm-minmax.cc b/test/qs8-gemm-minmax-gemmlowp.cc
similarity index 61%
rename from test/qs8-gemm-minmax.cc
rename to test/qs8-gemm-minmax-gemmlowp.cc
index 4f56544..a7341fb 100644
--- a/test/qs8-gemm-minmax.cc
+++ b/test/qs8-gemm-minmax-gemmlowp.cc
@@ -7,7 +7,7 @@
 // LICENSE file in the root directory of this source tree.
 //
 // Auto-generated file. Do not edit!
-//   Specification: test/qs8-gemm-minmax.yaml
+//   Specification: test/qs8-gemm-minmax-gemmlowp.yaml
 //   Generator: tools/generate-gemm-test.py
 
 
@@ -23,7 +23,7 @@
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -33,10 +33,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -47,10 +47,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -61,10 +61,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -77,12 +77,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -94,11 +94,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -110,11 +110,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -125,11 +125,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -141,11 +141,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -159,13 +159,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -176,11 +176,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -192,11 +192,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -210,13 +210,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -227,11 +227,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -243,11 +243,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -261,13 +261,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -279,12 +279,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -297,12 +297,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -315,12 +315,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -334,13 +334,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -352,12 +352,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -370,12 +370,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -388,12 +388,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -407,13 +407,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -428,13 +428,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -445,10 +445,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -459,10 +459,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -473,13 +473,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -489,10 +489,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -503,10 +503,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -517,10 +517,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -533,12 +533,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -550,11 +550,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -566,11 +566,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -581,11 +581,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -597,11 +597,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -615,13 +615,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -632,11 +632,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -648,11 +648,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -666,13 +666,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -683,11 +683,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -699,11 +699,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -717,13 +717,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -735,12 +735,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -753,12 +753,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -771,12 +771,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -790,13 +790,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -808,12 +808,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -826,12 +826,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -844,12 +844,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -863,13 +863,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -884,13 +884,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -901,10 +901,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -915,10 +915,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -929,13 +929,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -945,10 +945,10 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -959,10 +959,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -973,10 +973,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -989,12 +989,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -1006,11 +1006,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -1022,11 +1022,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -1037,11 +1037,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -1053,11 +1053,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1071,13 +1071,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -1088,11 +1088,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -1104,11 +1104,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1122,13 +1122,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -1139,11 +1139,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -1155,11 +1155,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1173,13 +1173,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1191,12 +1191,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1209,12 +1209,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1227,12 +1227,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1246,13 +1246,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1264,12 +1264,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1282,12 +1282,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1300,12 +1300,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1319,13 +1319,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1340,13 +1340,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1357,10 +1357,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1371,10 +1371,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1385,13 +1385,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1401,10 +1401,10 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1415,10 +1415,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1429,10 +1429,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -1445,12 +1445,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -1462,11 +1462,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -1478,11 +1478,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -1493,11 +1493,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -1509,11 +1509,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1527,13 +1527,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -1544,11 +1544,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -1560,11 +1560,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1578,13 +1578,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -1595,11 +1595,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -1611,11 +1611,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1629,13 +1629,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1647,12 +1647,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1665,12 +1665,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1683,12 +1683,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1702,13 +1702,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1720,12 +1720,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1738,12 +1738,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1756,12 +1756,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1775,13 +1775,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1796,13 +1796,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1813,10 +1813,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1827,10 +1827,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1841,13 +1841,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1857,10 +1857,10 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1871,10 +1871,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1885,10 +1885,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -1901,12 +1901,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -1918,11 +1918,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -1934,11 +1934,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -1949,11 +1949,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -1965,11 +1965,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1983,13 +1983,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -2000,11 +2000,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -2016,11 +2016,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2034,13 +2034,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -2051,11 +2051,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -2067,11 +2067,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2085,13 +2085,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2103,12 +2103,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2121,12 +2121,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2139,12 +2139,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2158,13 +2158,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2176,12 +2176,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2194,12 +2194,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2212,12 +2212,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2231,13 +2231,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2252,13 +2252,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2269,10 +2269,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2283,10 +2283,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2297,13 +2297,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2313,10 +2313,10 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2327,10 +2327,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2341,10 +2341,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -2357,12 +2357,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -2374,11 +2374,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -2390,11 +2390,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -2405,11 +2405,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -2421,11 +2421,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2439,13 +2439,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -2456,11 +2456,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -2472,11 +2472,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2490,13 +2490,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -2507,11 +2507,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -2523,11 +2523,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2541,13 +2541,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2559,12 +2559,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2577,12 +2577,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2595,12 +2595,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2614,13 +2614,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2632,12 +2632,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2650,12 +2650,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2668,12 +2668,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2687,13 +2687,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2708,13 +2708,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2725,10 +2725,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2739,10 +2739,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2753,13 +2753,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -2769,10 +2769,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -2783,10 +2783,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -2797,10 +2797,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -2813,12 +2813,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -2830,11 +2830,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -2846,11 +2846,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -2861,11 +2861,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -2877,11 +2877,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -2895,13 +2895,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -2912,11 +2912,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -2928,11 +2928,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -2946,13 +2946,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -2963,11 +2963,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -2979,11 +2979,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -2997,13 +2997,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3015,12 +3015,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3033,12 +3033,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3051,12 +3051,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3070,13 +3070,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3088,12 +3088,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3106,12 +3106,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3124,12 +3124,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3143,13 +3143,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3164,13 +3164,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3181,10 +3181,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3195,10 +3195,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3209,13 +3209,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3225,10 +3225,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3239,10 +3239,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3253,10 +3253,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -3269,12 +3269,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -3286,11 +3286,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -3302,11 +3302,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -3317,11 +3317,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -3333,11 +3333,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3351,13 +3351,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -3368,11 +3368,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -3384,11 +3384,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3402,13 +3402,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -3419,11 +3419,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -3435,11 +3435,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3453,13 +3453,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3471,12 +3471,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3489,12 +3489,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3507,12 +3507,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3526,13 +3526,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3544,12 +3544,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3562,12 +3562,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3580,12 +3580,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3599,13 +3599,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3620,13 +3620,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3637,10 +3637,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3651,10 +3651,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3665,13 +3665,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3681,10 +3681,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3695,10 +3695,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3709,10 +3709,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -3725,12 +3725,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -3742,11 +3742,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -3758,11 +3758,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -3773,11 +3773,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -3789,11 +3789,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3807,13 +3807,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -3824,11 +3824,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -3840,11 +3840,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3858,13 +3858,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -3875,11 +3875,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -3891,11 +3891,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3909,13 +3909,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3927,12 +3927,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3945,12 +3945,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3963,12 +3963,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3982,13 +3982,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4000,12 +4000,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4018,12 +4018,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4036,12 +4036,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4055,13 +4055,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4076,13 +4076,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4093,10 +4093,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4107,10 +4107,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4121,13 +4121,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4137,10 +4137,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4151,10 +4151,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4165,10 +4165,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -4181,12 +4181,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -4198,11 +4198,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -4214,11 +4214,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -4229,11 +4229,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -4245,11 +4245,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4263,13 +4263,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -4280,11 +4280,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -4296,11 +4296,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4314,13 +4314,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -4331,11 +4331,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -4347,11 +4347,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4365,13 +4365,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4383,12 +4383,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4401,12 +4401,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4419,12 +4419,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4438,13 +4438,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4456,12 +4456,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4474,12 +4474,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4492,12 +4492,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4511,13 +4511,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4532,13 +4532,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4549,10 +4549,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4563,10 +4563,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4577,13 +4577,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4593,10 +4593,10 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4607,10 +4607,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4621,10 +4621,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -4637,12 +4637,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -4654,11 +4654,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -4670,11 +4670,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -4685,11 +4685,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -4701,11 +4701,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4719,13 +4719,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -4736,11 +4736,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -4752,11 +4752,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4770,13 +4770,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -4787,11 +4787,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -4803,11 +4803,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4821,13 +4821,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -4839,12 +4839,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -4857,12 +4857,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -4875,12 +4875,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -4894,13 +4894,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -4912,12 +4912,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -4930,12 +4930,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -4948,12 +4948,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -4967,13 +4967,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4988,13 +4988,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -5005,10 +5005,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -5019,10 +5019,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AARCH64_NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -5033,13 +5033,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -5049,10 +5049,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -5063,10 +5063,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -5077,10 +5077,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -5093,12 +5093,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -5110,11 +5110,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -5126,11 +5126,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -5141,11 +5141,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -5157,11 +5157,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -5175,13 +5175,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -5192,11 +5192,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -5208,11 +5208,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -5226,13 +5226,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -5243,11 +5243,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -5259,11 +5259,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -5277,13 +5277,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5295,12 +5295,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5313,12 +5313,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5331,12 +5331,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5350,13 +5350,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5368,12 +5368,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5386,12 +5386,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5404,12 +5404,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5423,13 +5423,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -5444,13 +5444,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -5461,10 +5461,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -5475,10 +5475,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -5489,13 +5489,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -5505,10 +5505,10 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -5519,10 +5519,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -5533,10 +5533,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -5549,12 +5549,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -5566,11 +5566,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -5582,11 +5582,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -5597,11 +5597,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -5613,11 +5613,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -5631,13 +5631,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -5648,11 +5648,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -5664,11 +5664,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -5682,13 +5682,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -5699,11 +5699,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -5715,11 +5715,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -5733,13 +5733,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5751,12 +5751,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5769,12 +5769,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5787,12 +5787,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5806,13 +5806,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5824,12 +5824,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5842,12 +5842,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5860,12 +5860,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5879,13 +5879,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -5900,13 +5900,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -5917,10 +5917,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -5931,10 +5931,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -5945,13 +5945,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -5961,10 +5961,10 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -5975,10 +5975,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -5989,10 +5989,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -6005,12 +6005,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -6022,11 +6022,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -6038,11 +6038,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -6053,11 +6053,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -6069,11 +6069,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -6087,13 +6087,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -6104,11 +6104,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -6120,11 +6120,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -6138,13 +6138,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -6155,11 +6155,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -6171,11 +6171,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -6189,13 +6189,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6207,12 +6207,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6225,12 +6225,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6243,12 +6243,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6262,13 +6262,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6280,12 +6280,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6298,12 +6298,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6316,12 +6316,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6335,13 +6335,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -6356,13 +6356,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -6373,10 +6373,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -6387,10 +6387,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -6401,13 +6401,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -6417,10 +6417,10 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -6431,10 +6431,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -6445,10 +6445,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -6461,12 +6461,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -6478,11 +6478,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -6494,11 +6494,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -6509,11 +6509,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -6525,11 +6525,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -6543,13 +6543,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -6560,11 +6560,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -6576,11 +6576,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -6594,13 +6594,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -6611,11 +6611,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -6627,11 +6627,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -6645,13 +6645,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6663,12 +6663,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6681,12 +6681,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6699,12 +6699,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6718,13 +6718,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6736,12 +6736,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6754,12 +6754,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6772,12 +6772,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6791,13 +6791,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -6812,13 +6812,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -6829,10 +6829,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -6843,10 +6843,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -6857,13 +6857,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -6873,10 +6873,10 @@
       .m(6)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -6887,10 +6887,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -6901,10 +6901,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -6917,12 +6917,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       GemmMicrokernelTester()
@@ -6934,11 +6934,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -6950,11 +6950,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -6965,11 +6965,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -6981,11 +6981,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -6999,13 +6999,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -7016,11 +7016,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -7032,11 +7032,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -7050,13 +7050,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -7067,11 +7067,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -7083,11 +7083,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -7101,13 +7101,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7119,12 +7119,12 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7137,12 +7137,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7155,12 +7155,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7174,13 +7174,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7192,12 +7192,12 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7210,12 +7210,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7228,12 +7228,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7247,13 +7247,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -7268,13 +7268,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -7285,10 +7285,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -7299,10 +7299,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -7313,13 +7313,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -7329,10 +7329,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -7343,10 +7343,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -7357,10 +7357,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -7373,12 +7373,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -7390,11 +7390,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -7406,11 +7406,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -7421,11 +7421,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -7437,11 +7437,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -7455,13 +7455,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -7472,11 +7472,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -7488,11 +7488,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -7506,13 +7506,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -7523,11 +7523,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -7539,11 +7539,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -7557,13 +7557,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7575,12 +7575,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7593,12 +7593,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7611,12 +7611,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7630,13 +7630,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7648,12 +7648,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7666,12 +7666,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7684,12 +7684,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7703,13 +7703,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -7724,13 +7724,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -7741,10 +7741,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -7755,10 +7755,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -7769,13 +7769,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -7785,10 +7785,10 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -7799,10 +7799,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -7813,10 +7813,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -7829,12 +7829,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -7846,11 +7846,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -7862,11 +7862,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -7877,11 +7877,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -7893,11 +7893,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -7911,13 +7911,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -7928,11 +7928,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -7944,11 +7944,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -7962,13 +7962,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -7979,11 +7979,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -7995,11 +7995,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -8013,13 +8013,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8031,12 +8031,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8049,12 +8049,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8067,12 +8067,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8086,13 +8086,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8104,12 +8104,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8122,12 +8122,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8140,12 +8140,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8159,13 +8159,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -8180,13 +8180,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -8197,10 +8197,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -8211,10 +8211,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -8225,13 +8225,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -8241,10 +8241,10 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -8255,10 +8255,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -8269,10 +8269,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -8285,12 +8285,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -8302,11 +8302,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -8318,11 +8318,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -8333,11 +8333,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -8349,11 +8349,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -8367,13 +8367,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -8384,11 +8384,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -8400,11 +8400,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -8418,13 +8418,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -8435,11 +8435,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -8451,11 +8451,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -8469,13 +8469,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8487,12 +8487,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8505,12 +8505,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8523,12 +8523,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8542,13 +8542,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8560,12 +8560,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8578,12 +8578,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8596,12 +8596,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8615,13 +8615,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -8636,13 +8636,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -8653,10 +8653,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -8667,10 +8667,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -8681,13 +8681,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -8697,10 +8697,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -8711,10 +8711,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -8725,10 +8725,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -8741,12 +8741,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -8758,11 +8758,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -8774,11 +8774,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -8789,11 +8789,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -8805,11 +8805,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -8823,13 +8823,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -8840,11 +8840,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -8856,11 +8856,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -8874,13 +8874,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -8891,11 +8891,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -8907,11 +8907,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -8925,13 +8925,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8943,12 +8943,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8961,12 +8961,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8979,12 +8979,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8998,13 +8998,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9016,12 +9016,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9034,12 +9034,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9052,12 +9052,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9071,13 +9071,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -9092,13 +9092,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -9109,10 +9109,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -9123,10 +9123,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -9137,13 +9137,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -9153,10 +9153,10 @@
       .m(6)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -9167,10 +9167,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -9181,10 +9181,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -9197,12 +9197,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       GemmMicrokernelTester()
@@ -9214,11 +9214,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -9230,11 +9230,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -9245,11 +9245,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -9261,11 +9261,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -9279,13 +9279,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -9296,11 +9296,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -9312,11 +9312,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -9330,13 +9330,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -9347,11 +9347,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -9363,11 +9363,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -9381,13 +9381,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9399,12 +9399,12 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9417,12 +9417,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9435,12 +9435,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9454,13 +9454,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9472,12 +9472,12 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9490,12 +9490,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9508,12 +9508,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9527,13 +9527,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -9548,13 +9548,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -9565,10 +9565,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -9579,10 +9579,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -9593,13 +9593,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -9609,10 +9609,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -9623,10 +9623,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -9637,10 +9637,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -9653,12 +9653,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -9670,11 +9670,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -9686,11 +9686,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -9701,11 +9701,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -9717,11 +9717,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -9735,13 +9735,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -9752,11 +9752,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -9768,11 +9768,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -9786,13 +9786,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -9803,11 +9803,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -9819,11 +9819,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -9837,13 +9837,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9855,12 +9855,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9873,12 +9873,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9891,12 +9891,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9910,13 +9910,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9928,12 +9928,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9946,12 +9946,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9964,12 +9964,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9983,13 +9983,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -10004,13 +10004,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -10021,10 +10021,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -10035,10 +10035,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -10049,13 +10049,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -10065,10 +10065,10 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -10079,10 +10079,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -10093,10 +10093,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -10109,12 +10109,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -10126,11 +10126,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -10142,11 +10142,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -10157,11 +10157,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -10173,11 +10173,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -10191,13 +10191,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -10208,11 +10208,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -10224,11 +10224,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -10242,13 +10242,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -10259,11 +10259,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -10275,11 +10275,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -10293,13 +10293,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10311,12 +10311,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10329,12 +10329,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10347,12 +10347,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10366,13 +10366,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10384,12 +10384,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10402,12 +10402,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10420,12 +10420,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10439,13 +10439,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -10460,13 +10460,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -10477,10 +10477,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -10491,10 +10491,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -10505,13 +10505,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -10521,10 +10521,10 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -10535,10 +10535,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -10549,10 +10549,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -10565,12 +10565,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -10582,11 +10582,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -10598,11 +10598,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -10613,11 +10613,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -10629,11 +10629,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -10647,13 +10647,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -10664,11 +10664,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -10680,11 +10680,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -10698,13 +10698,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -10715,11 +10715,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -10731,11 +10731,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -10749,13 +10749,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10767,12 +10767,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10785,12 +10785,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10803,12 +10803,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10822,13 +10822,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10840,12 +10840,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10858,12 +10858,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10876,12 +10876,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10895,13 +10895,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -10916,13 +10916,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -10933,10 +10933,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -10947,10 +10947,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -10961,13 +10961,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -10977,10 +10977,10 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -10991,10 +10991,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -11005,10 +11005,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -11021,12 +11021,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -11038,11 +11038,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -11054,11 +11054,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -11069,11 +11069,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -11085,11 +11085,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -11103,13 +11103,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -11120,11 +11120,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -11136,11 +11136,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -11154,13 +11154,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -11171,11 +11171,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -11187,11 +11187,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -11205,13 +11205,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11223,12 +11223,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11241,12 +11241,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11259,12 +11259,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11278,13 +11278,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11296,12 +11296,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11314,12 +11314,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11332,12 +11332,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11351,13 +11351,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -11372,13 +11372,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -11389,10 +11389,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -11403,10 +11403,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -11417,13 +11417,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -11433,10 +11433,10 @@
       .m(6)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -11447,10 +11447,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -11461,10 +11461,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -11477,12 +11477,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       GemmMicrokernelTester()
@@ -11494,11 +11494,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -11510,11 +11510,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -11525,11 +11525,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -11541,11 +11541,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -11559,13 +11559,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -11576,11 +11576,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -11592,11 +11592,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -11610,13 +11610,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -11627,11 +11627,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -11643,11 +11643,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -11661,13 +11661,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11679,12 +11679,12 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11697,12 +11697,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11715,12 +11715,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11734,13 +11734,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11752,12 +11752,12 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11770,12 +11770,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11788,12 +11788,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11807,13 +11807,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -11828,13 +11828,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -11845,10 +11845,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -11859,10 +11859,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -11873,13 +11873,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -11889,10 +11889,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -11903,10 +11903,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -11917,10 +11917,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -11933,12 +11933,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -11950,11 +11950,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -11966,11 +11966,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -11981,11 +11981,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -11997,11 +11997,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -12015,13 +12015,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -12032,11 +12032,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -12048,11 +12048,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -12066,13 +12066,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -12083,11 +12083,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -12099,11 +12099,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -12117,13 +12117,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12135,12 +12135,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12153,12 +12153,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12171,12 +12171,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12190,13 +12190,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12208,12 +12208,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12226,12 +12226,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12244,12 +12244,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12263,13 +12263,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -12284,13 +12284,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -12301,10 +12301,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -12315,10 +12315,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -12329,13 +12329,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -12345,10 +12345,10 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -12359,10 +12359,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -12373,10 +12373,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -12389,12 +12389,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -12406,11 +12406,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -12422,11 +12422,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -12437,11 +12437,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -12453,11 +12453,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -12471,13 +12471,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -12488,11 +12488,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -12504,11 +12504,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -12522,13 +12522,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -12539,11 +12539,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -12555,11 +12555,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -12573,13 +12573,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12591,12 +12591,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12609,12 +12609,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12627,12 +12627,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12646,13 +12646,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12664,12 +12664,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12682,12 +12682,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12700,12 +12700,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12719,13 +12719,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -12740,13 +12740,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -12757,10 +12757,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -12771,10 +12771,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -12785,13 +12785,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -12801,10 +12801,10 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -12815,10 +12815,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -12829,10 +12829,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -12845,12 +12845,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -12862,11 +12862,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -12878,11 +12878,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -12893,11 +12893,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -12909,11 +12909,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -12927,13 +12927,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -12944,11 +12944,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -12960,11 +12960,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -12978,13 +12978,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -12995,11 +12995,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -13011,11 +13011,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -13029,13 +13029,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13047,12 +13047,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13065,12 +13065,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13083,12 +13083,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13102,13 +13102,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13120,12 +13120,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13138,12 +13138,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13156,12 +13156,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13175,13 +13175,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -13196,13 +13196,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -13213,10 +13213,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -13227,10 +13227,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -13241,13 +13241,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -13257,10 +13257,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -13271,10 +13271,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -13285,10 +13285,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -13301,12 +13301,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -13318,11 +13318,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -13334,11 +13334,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -13349,11 +13349,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -13365,11 +13365,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -13383,13 +13383,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -13400,11 +13400,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -13416,11 +13416,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -13434,13 +13434,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -13451,11 +13451,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -13467,11 +13467,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -13485,13 +13485,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13503,12 +13503,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13521,12 +13521,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13539,12 +13539,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13558,13 +13558,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13576,12 +13576,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13594,12 +13594,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13612,12 +13612,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13631,13 +13631,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -13652,13 +13652,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -13669,10 +13669,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -13683,10 +13683,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -13697,13 +13697,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -13713,10 +13713,10 @@
       .m(6)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -13727,10 +13727,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -13741,10 +13741,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -13757,12 +13757,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       GemmMicrokernelTester()
@@ -13774,11 +13774,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -13790,11 +13790,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -13805,11 +13805,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -13821,11 +13821,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -13839,13 +13839,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -13856,11 +13856,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -13872,11 +13872,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -13890,13 +13890,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -13907,11 +13907,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -13923,11 +13923,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -13941,13 +13941,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13959,12 +13959,12 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13977,12 +13977,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13995,12 +13995,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14014,13 +14014,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14032,12 +14032,12 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14050,12 +14050,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14068,12 +14068,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14087,13 +14087,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -14108,13 +14108,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -14125,10 +14125,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -14139,10 +14139,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -14153,13 +14153,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -14169,10 +14169,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -14183,10 +14183,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -14197,10 +14197,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -14213,12 +14213,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -14230,11 +14230,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -14246,11 +14246,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -14261,11 +14261,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -14277,11 +14277,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -14295,13 +14295,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -14312,11 +14312,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -14328,11 +14328,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -14346,13 +14346,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -14363,11 +14363,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -14379,11 +14379,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -14397,13 +14397,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14415,12 +14415,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14433,12 +14433,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14451,12 +14451,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14470,13 +14470,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14488,12 +14488,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14506,12 +14506,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14524,12 +14524,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14543,13 +14543,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -14564,13 +14564,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -14581,10 +14581,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -14595,10 +14595,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -14609,13 +14609,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -14625,10 +14625,10 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -14639,10 +14639,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -14653,10 +14653,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -14669,12 +14669,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -14686,11 +14686,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -14702,11 +14702,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -14717,11 +14717,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -14733,11 +14733,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -14751,13 +14751,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -14768,11 +14768,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -14784,11 +14784,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -14802,13 +14802,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -14819,11 +14819,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -14835,11 +14835,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -14853,13 +14853,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14871,12 +14871,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14889,12 +14889,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14907,12 +14907,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14926,13 +14926,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14944,12 +14944,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14962,12 +14962,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14980,12 +14980,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14999,13 +14999,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -15020,13 +15020,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -15037,10 +15037,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -15051,10 +15051,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -15065,13 +15065,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -15081,10 +15081,10 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -15095,10 +15095,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -15109,10 +15109,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -15125,12 +15125,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -15142,11 +15142,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -15158,11 +15158,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -15173,11 +15173,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -15189,11 +15189,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -15207,13 +15207,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -15224,11 +15224,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -15240,11 +15240,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -15258,13 +15258,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -15275,11 +15275,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -15291,11 +15291,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -15309,13 +15309,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15327,12 +15327,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15345,12 +15345,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15363,12 +15363,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15382,13 +15382,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15400,12 +15400,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15418,12 +15418,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15436,12 +15436,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15455,13 +15455,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -15476,13 +15476,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -15493,10 +15493,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -15507,10 +15507,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -15521,13 +15521,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -15537,10 +15537,10 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -15551,10 +15551,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -15565,10 +15565,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -15581,12 +15581,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -15598,11 +15598,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -15614,11 +15614,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -15629,11 +15629,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -15645,11 +15645,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -15663,13 +15663,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -15680,11 +15680,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -15696,11 +15696,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -15714,13 +15714,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -15731,11 +15731,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -15747,11 +15747,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -15765,13 +15765,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15783,12 +15783,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15801,12 +15801,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15819,12 +15819,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15838,13 +15838,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15856,12 +15856,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15874,12 +15874,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15892,12 +15892,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15911,13 +15911,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -15932,13 +15932,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -15949,10 +15949,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -15963,10 +15963,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -15977,13 +15977,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -15993,10 +15993,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -16007,10 +16007,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -16021,10 +16021,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -16037,12 +16037,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -16054,11 +16054,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -16070,11 +16070,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -16085,11 +16085,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -16101,11 +16101,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -16119,13 +16119,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -16136,11 +16136,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -16152,11 +16152,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -16170,13 +16170,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -16187,11 +16187,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -16203,11 +16203,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -16221,13 +16221,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16239,12 +16239,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16257,12 +16257,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16275,12 +16275,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16294,13 +16294,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16312,12 +16312,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16330,12 +16330,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16348,12 +16348,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16367,13 +16367,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -16388,13 +16388,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -16405,10 +16405,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -16419,10 +16419,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -16433,13 +16433,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -16449,10 +16449,10 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -16463,10 +16463,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -16477,10 +16477,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -16493,12 +16493,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -16510,11 +16510,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -16526,11 +16526,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -16541,11 +16541,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -16557,11 +16557,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -16575,13 +16575,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -16592,11 +16592,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -16608,11 +16608,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -16626,13 +16626,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -16643,11 +16643,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -16659,11 +16659,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -16677,13 +16677,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16695,12 +16695,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16713,12 +16713,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16731,12 +16731,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16750,13 +16750,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16768,12 +16768,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16786,12 +16786,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16804,12 +16804,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16823,13 +16823,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -16844,13 +16844,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -16861,10 +16861,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -16875,10 +16875,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -16889,13 +16889,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -16905,10 +16905,10 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -16919,10 +16919,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -16933,10 +16933,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -16949,12 +16949,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -16966,11 +16966,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -16982,11 +16982,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -16997,11 +16997,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -17013,11 +17013,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -17031,13 +17031,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -17048,11 +17048,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -17064,11 +17064,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -17082,13 +17082,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -17099,11 +17099,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -17115,11 +17115,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -17133,13 +17133,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17151,12 +17151,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17169,12 +17169,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17187,12 +17187,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17206,13 +17206,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17224,12 +17224,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17242,12 +17242,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17260,12 +17260,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17279,13 +17279,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -17300,13 +17300,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -17317,10 +17317,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -17331,10 +17331,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -17345,13 +17345,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -17361,10 +17361,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -17375,10 +17375,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -17389,10 +17389,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -17405,12 +17405,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -17422,11 +17422,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -17438,11 +17438,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -17453,11 +17453,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -17469,11 +17469,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -17487,13 +17487,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -17504,11 +17504,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -17520,11 +17520,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -17538,13 +17538,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -17555,11 +17555,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -17571,11 +17571,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -17589,13 +17589,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17607,12 +17607,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17625,12 +17625,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17643,12 +17643,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17662,13 +17662,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17680,12 +17680,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17698,12 +17698,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17716,12 +17716,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17735,13 +17735,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -17756,13 +17756,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -17773,10 +17773,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -17787,10 +17787,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -17801,13 +17801,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -17817,10 +17817,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -17831,10 +17831,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -17845,10 +17845,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -17861,12 +17861,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -17878,11 +17878,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -17894,11 +17894,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -17909,11 +17909,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -17925,11 +17925,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -17943,13 +17943,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -17960,11 +17960,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -17976,11 +17976,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -17994,13 +17994,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -18011,11 +18011,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -18027,11 +18027,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -18045,13 +18045,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18063,12 +18063,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18081,12 +18081,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18099,12 +18099,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18118,13 +18118,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18136,12 +18136,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18154,12 +18154,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18172,12 +18172,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18191,13 +18191,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -18212,13 +18212,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -18229,10 +18229,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -18243,10 +18243,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -18257,13 +18257,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -18273,10 +18273,10 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -18287,10 +18287,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -18301,10 +18301,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -18317,12 +18317,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -18334,11 +18334,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -18350,11 +18350,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -18365,11 +18365,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -18381,11 +18381,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -18399,13 +18399,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -18416,11 +18416,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -18432,11 +18432,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -18450,13 +18450,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -18467,11 +18467,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -18483,11 +18483,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -18501,13 +18501,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18519,12 +18519,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18537,12 +18537,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18555,12 +18555,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18574,13 +18574,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18592,12 +18592,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18610,12 +18610,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18628,12 +18628,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18647,13 +18647,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -18668,13 +18668,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -18685,10 +18685,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -18699,10 +18699,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -18713,13 +18713,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -18729,10 +18729,10 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -18743,10 +18743,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -18757,10 +18757,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -18773,12 +18773,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -18790,11 +18790,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -18806,11 +18806,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -18821,11 +18821,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -18837,11 +18837,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -18855,13 +18855,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -18872,11 +18872,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -18888,11 +18888,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -18906,13 +18906,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -18923,11 +18923,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -18939,11 +18939,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -18957,13 +18957,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18975,12 +18975,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18993,12 +18993,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19011,12 +19011,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19030,13 +19030,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19048,12 +19048,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19066,12 +19066,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19084,12 +19084,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19103,13 +19103,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -19124,13 +19124,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -19141,10 +19141,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -19155,10 +19155,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -19169,13 +19169,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -19185,10 +19185,10 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -19199,10 +19199,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -19213,10 +19213,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -19229,12 +19229,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -19246,11 +19246,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -19262,11 +19262,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -19277,11 +19277,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -19293,11 +19293,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -19311,13 +19311,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -19328,11 +19328,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -19344,11 +19344,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -19362,13 +19362,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -19379,11 +19379,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -19395,11 +19395,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -19413,13 +19413,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19431,12 +19431,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19449,12 +19449,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19467,12 +19467,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19486,13 +19486,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19504,12 +19504,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19522,12 +19522,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19540,12 +19540,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19559,13 +19559,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -19580,13 +19580,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -19597,10 +19597,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -19611,10 +19611,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -19625,13 +19625,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -19641,10 +19641,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -19655,10 +19655,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -19669,10 +19669,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -19685,12 +19685,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -19702,11 +19702,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -19718,11 +19718,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -19733,11 +19733,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -19749,11 +19749,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -19767,13 +19767,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -19784,11 +19784,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -19800,11 +19800,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -19818,13 +19818,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -19835,11 +19835,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -19851,11 +19851,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -19869,13 +19869,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19887,12 +19887,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19905,12 +19905,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19923,12 +19923,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19942,13 +19942,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19960,12 +19960,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19978,12 +19978,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -19996,12 +19996,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20015,13 +20015,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -20036,13 +20036,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -20053,10 +20053,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -20067,10 +20067,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -20081,13 +20081,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -20097,10 +20097,10 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -20111,10 +20111,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -20125,10 +20125,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -20141,12 +20141,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -20158,11 +20158,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -20174,11 +20174,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -20189,11 +20189,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -20205,11 +20205,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -20223,13 +20223,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -20240,11 +20240,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -20256,11 +20256,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -20274,13 +20274,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -20291,11 +20291,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -20307,11 +20307,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -20325,13 +20325,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20343,12 +20343,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20361,12 +20361,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20379,12 +20379,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20398,13 +20398,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20416,12 +20416,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20434,12 +20434,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20452,12 +20452,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20471,13 +20471,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -20492,13 +20492,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -20509,10 +20509,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -20523,10 +20523,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -20537,13 +20537,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -20553,10 +20553,10 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -20567,10 +20567,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -20581,10 +20581,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -20597,12 +20597,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -20614,11 +20614,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -20630,11 +20630,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -20645,11 +20645,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -20661,11 +20661,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -20679,13 +20679,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -20696,11 +20696,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -20712,11 +20712,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -20730,13 +20730,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -20747,11 +20747,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -20763,11 +20763,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -20781,13 +20781,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20799,12 +20799,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20817,12 +20817,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20835,12 +20835,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20854,13 +20854,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20872,12 +20872,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20890,12 +20890,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20908,12 +20908,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -20927,13 +20927,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -20948,13 +20948,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -20965,10 +20965,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -20979,10 +20979,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -20993,13 +20993,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -21009,10 +21009,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -21023,10 +21023,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -21037,10 +21037,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -21053,12 +21053,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -21070,11 +21070,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -21086,11 +21086,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -21101,11 +21101,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -21117,11 +21117,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -21135,13 +21135,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -21152,11 +21152,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -21168,11 +21168,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -21186,13 +21186,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -21203,11 +21203,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -21219,11 +21219,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -21237,13 +21237,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -21255,12 +21255,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -21273,12 +21273,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -21291,12 +21291,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -21310,13 +21310,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -21328,12 +21328,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -21346,12 +21346,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -21364,12 +21364,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -21383,13 +21383,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -21404,13 +21404,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -21421,10 +21421,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -21435,10 +21435,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -21449,13 +21449,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -21465,10 +21465,10 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -21479,10 +21479,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -21493,10 +21493,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -21509,12 +21509,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -21526,11 +21526,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -21542,11 +21542,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -21557,11 +21557,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -21573,11 +21573,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -21591,13 +21591,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -21608,11 +21608,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -21624,11 +21624,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -21642,13 +21642,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -21659,11 +21659,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -21675,11 +21675,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -21693,13 +21693,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21711,12 +21711,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21729,12 +21729,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21747,12 +21747,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21766,13 +21766,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21784,12 +21784,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21802,12 +21802,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21820,12 +21820,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21839,13 +21839,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -21860,13 +21860,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -21877,10 +21877,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -21891,10 +21891,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -21905,13 +21905,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -21921,10 +21921,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -21935,10 +21935,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -21949,10 +21949,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -21965,12 +21965,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -21982,11 +21982,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -21998,11 +21998,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -22013,11 +22013,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -22029,11 +22029,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -22047,13 +22047,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -22064,11 +22064,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -22080,11 +22080,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -22098,13 +22098,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -22115,11 +22115,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -22131,11 +22131,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -22149,13 +22149,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22167,12 +22167,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22185,12 +22185,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22203,12 +22203,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22222,13 +22222,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22240,12 +22240,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22258,12 +22258,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22276,12 +22276,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22295,13 +22295,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -22316,13 +22316,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -22333,10 +22333,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -22347,10 +22347,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -22361,13 +22361,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -22377,10 +22377,10 @@
       .m(3)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -22391,10 +22391,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -22405,10 +22405,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -22421,12 +22421,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -22438,11 +22438,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -22454,11 +22454,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -22469,11 +22469,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -22485,11 +22485,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -22503,13 +22503,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -22520,11 +22520,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -22536,11 +22536,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -22554,13 +22554,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -22571,11 +22571,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -22587,11 +22587,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -22605,13 +22605,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22623,12 +22623,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22641,12 +22641,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22659,12 +22659,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22678,13 +22678,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22696,12 +22696,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22714,12 +22714,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22732,12 +22732,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22751,13 +22751,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -22772,13 +22772,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -22789,10 +22789,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -22803,10 +22803,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -22817,13 +22817,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -22833,10 +22833,10 @@
       .m(4)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -22847,10 +22847,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -22861,10 +22861,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -22877,12 +22877,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -22894,11 +22894,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -22910,11 +22910,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -22925,11 +22925,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -22941,11 +22941,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -22959,13 +22959,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -22976,11 +22976,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -22992,11 +22992,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -23010,13 +23010,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -23027,11 +23027,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -23043,11 +23043,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -23061,13 +23061,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23079,12 +23079,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23097,12 +23097,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23115,12 +23115,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23134,13 +23134,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23152,12 +23152,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23170,12 +23170,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23188,12 +23188,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23207,13 +23207,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -23228,13 +23228,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -23245,10 +23245,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -23259,10 +23259,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -23273,13 +23273,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -23289,10 +23289,10 @@
       .m(1)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -23303,10 +23303,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -23317,10 +23317,10 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -23333,12 +23333,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -23350,11 +23350,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -23366,11 +23366,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -23381,11 +23381,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -23397,11 +23397,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -23415,13 +23415,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -23432,11 +23432,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -23448,11 +23448,11 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -23466,13 +23466,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -23483,11 +23483,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -23499,11 +23499,11 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -23517,13 +23517,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23535,12 +23535,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23553,12 +23553,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23571,12 +23571,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23590,13 +23590,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23608,12 +23608,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23626,12 +23626,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23644,12 +23644,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23663,13 +23663,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -23684,13 +23684,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -23701,10 +23701,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -23715,10 +23715,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -23729,13 +23729,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -23745,10 +23745,10 @@
       .m(2)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -23759,10 +23759,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -23773,10 +23773,10 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -23789,12 +23789,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -23806,11 +23806,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -23822,11 +23822,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -23837,11 +23837,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -23853,11 +23853,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -23871,13 +23871,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -23888,11 +23888,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -23904,11 +23904,11 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -23922,13 +23922,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -23939,11 +23939,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -23955,11 +23955,11 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -23973,13 +23973,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23991,12 +23991,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24009,12 +24009,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24027,12 +24027,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24046,13 +24046,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24064,12 +24064,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24082,12 +24082,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24100,12 +24100,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24119,13 +24119,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -24140,13 +24140,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -24157,10 +24157,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -24171,10 +24171,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -24185,13 +24185,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -24201,10 +24201,10 @@
       .m(3)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -24215,10 +24215,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -24229,10 +24229,10 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -24245,12 +24245,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -24262,11 +24262,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -24278,11 +24278,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -24293,11 +24293,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -24309,11 +24309,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -24327,13 +24327,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -24344,11 +24344,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -24360,11 +24360,11 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -24378,13 +24378,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -24395,11 +24395,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -24411,11 +24411,11 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -24429,13 +24429,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24447,12 +24447,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24465,12 +24465,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24483,12 +24483,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24502,13 +24502,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24520,12 +24520,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24538,12 +24538,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24556,12 +24556,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24575,13 +24575,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -24596,13 +24596,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -24613,10 +24613,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -24627,10 +24627,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -24641,13 +24641,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -24657,10 +24657,10 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -24671,10 +24671,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -24685,10 +24685,10 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -24701,12 +24701,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -24718,11 +24718,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -24734,11 +24734,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -24749,11 +24749,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -24765,11 +24765,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -24783,13 +24783,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -24800,11 +24800,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -24816,11 +24816,11 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -24834,13 +24834,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -24851,11 +24851,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -24867,11 +24867,11 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -24885,13 +24885,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24903,12 +24903,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24921,12 +24921,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24939,12 +24939,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24958,13 +24958,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24976,12 +24976,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24994,12 +24994,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25012,12 +25012,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25031,13 +25031,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -25052,13 +25052,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -25069,10 +25069,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -25083,10 +25083,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -25097,13 +25097,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -25113,10 +25113,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -25127,10 +25127,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -25141,10 +25141,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -25157,12 +25157,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -25174,11 +25174,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -25190,11 +25190,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -25205,11 +25205,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -25221,11 +25221,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -25239,13 +25239,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -25256,11 +25256,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -25272,11 +25272,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -25290,13 +25290,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -25307,11 +25307,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -25323,11 +25323,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -25341,13 +25341,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25359,12 +25359,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25377,12 +25377,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25395,12 +25395,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25414,13 +25414,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25432,12 +25432,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25450,12 +25450,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25468,12 +25468,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25487,13 +25487,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -25508,13 +25508,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -25525,10 +25525,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -25539,10 +25539,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -25553,13 +25553,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -25569,10 +25569,10 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -25583,10 +25583,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -25597,10 +25597,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -25613,12 +25613,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -25630,11 +25630,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -25646,11 +25646,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -25661,11 +25661,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -25677,11 +25677,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -25695,13 +25695,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -25712,11 +25712,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -25728,11 +25728,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -25746,13 +25746,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -25763,11 +25763,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -25779,11 +25779,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -25797,13 +25797,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25815,12 +25815,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25833,12 +25833,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25851,12 +25851,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25870,13 +25870,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25888,12 +25888,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25906,12 +25906,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25924,12 +25924,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25943,13 +25943,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -25964,13 +25964,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -25981,10 +25981,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -25995,10 +25995,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -26009,13 +26009,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -26025,10 +26025,10 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -26039,10 +26039,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -26053,10 +26053,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -26069,12 +26069,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -26086,11 +26086,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -26102,11 +26102,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -26117,11 +26117,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -26133,11 +26133,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -26151,13 +26151,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -26168,11 +26168,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -26184,11 +26184,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -26202,13 +26202,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -26219,11 +26219,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -26235,11 +26235,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -26253,13 +26253,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26271,12 +26271,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26289,12 +26289,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26307,12 +26307,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26326,13 +26326,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26344,12 +26344,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26362,12 +26362,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26380,12 +26380,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26399,13 +26399,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -26420,13 +26420,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -26437,10 +26437,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -26451,10 +26451,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -26465,13 +26465,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -26481,10 +26481,10 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -26495,10 +26495,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -26509,10 +26509,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -26525,12 +26525,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -26542,11 +26542,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -26558,11 +26558,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -26573,11 +26573,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -26589,11 +26589,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -26607,13 +26607,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -26624,11 +26624,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -26640,11 +26640,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -26658,13 +26658,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -26675,11 +26675,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -26691,11 +26691,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -26709,13 +26709,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26727,12 +26727,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26745,12 +26745,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26763,12 +26763,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26782,13 +26782,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26800,12 +26800,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26818,12 +26818,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26836,12 +26836,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26855,13 +26855,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -26876,13 +26876,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -26893,10 +26893,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -26907,10 +26907,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -26921,13 +26921,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -26937,10 +26937,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -26951,10 +26951,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -26965,10 +26965,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -26981,12 +26981,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -26998,11 +26998,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -27014,11 +27014,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -27029,11 +27029,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -27045,11 +27045,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -27063,13 +27063,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -27080,11 +27080,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -27096,11 +27096,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -27114,13 +27114,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -27131,11 +27131,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -27147,11 +27147,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -27165,13 +27165,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27183,12 +27183,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27201,12 +27201,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27219,12 +27219,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27238,13 +27238,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27256,12 +27256,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27274,12 +27274,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27292,12 +27292,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27311,13 +27311,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -27332,13 +27332,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -27349,10 +27349,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -27363,10 +27363,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -27377,13 +27377,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -27393,10 +27393,10 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -27407,10 +27407,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -27421,10 +27421,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -27437,12 +27437,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -27454,11 +27454,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -27470,11 +27470,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -27485,11 +27485,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -27501,11 +27501,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -27519,13 +27519,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -27536,11 +27536,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -27552,11 +27552,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -27570,13 +27570,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -27587,11 +27587,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -27603,11 +27603,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -27621,13 +27621,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27639,12 +27639,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27657,12 +27657,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27675,12 +27675,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27694,13 +27694,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27712,12 +27712,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27730,12 +27730,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27748,12 +27748,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27767,13 +27767,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -27788,13 +27788,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -27805,10 +27805,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -27819,10 +27819,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -27833,13 +27833,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -27849,10 +27849,10 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -27863,10 +27863,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -27877,10 +27877,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -27893,12 +27893,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -27910,11 +27910,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -27926,11 +27926,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -27941,11 +27941,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -27957,11 +27957,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -27975,13 +27975,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -27992,11 +27992,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -28008,11 +28008,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -28026,13 +28026,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -28043,11 +28043,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -28059,11 +28059,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -28077,13 +28077,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28095,12 +28095,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28113,12 +28113,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28131,12 +28131,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28150,13 +28150,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28168,12 +28168,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28186,12 +28186,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28204,12 +28204,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28223,13 +28223,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -28244,13 +28244,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -28261,10 +28261,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -28275,10 +28275,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -28289,13 +28289,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -28305,10 +28305,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -28319,10 +28319,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -28333,10 +28333,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -28349,12 +28349,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -28366,11 +28366,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -28382,11 +28382,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -28397,11 +28397,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -28413,11 +28413,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -28431,13 +28431,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -28448,11 +28448,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -28464,11 +28464,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -28482,13 +28482,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -28499,11 +28499,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -28515,11 +28515,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -28533,13 +28533,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28551,12 +28551,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28569,12 +28569,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28587,12 +28587,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28606,13 +28606,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28624,12 +28624,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28642,12 +28642,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28660,12 +28660,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28679,13 +28679,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -28700,13 +28700,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -28717,10 +28717,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -28731,10 +28731,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -28745,13 +28745,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -28761,10 +28761,10 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -28775,10 +28775,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -28789,10 +28789,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -28805,12 +28805,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -28822,11 +28822,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -28838,11 +28838,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -28853,11 +28853,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -28869,11 +28869,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -28887,13 +28887,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -28904,11 +28904,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -28920,11 +28920,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -28938,13 +28938,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -28955,11 +28955,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -28971,11 +28971,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -28989,13 +28989,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29007,12 +29007,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29025,12 +29025,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29043,12 +29043,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29062,13 +29062,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29080,12 +29080,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29098,12 +29098,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29116,12 +29116,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29135,13 +29135,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -29156,13 +29156,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -29173,10 +29173,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -29187,10 +29187,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -29201,13 +29201,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -29217,10 +29217,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -29231,10 +29231,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -29245,10 +29245,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -29261,12 +29261,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -29278,11 +29278,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -29294,11 +29294,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -29309,11 +29309,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -29325,11 +29325,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -29343,13 +29343,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -29360,11 +29360,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -29376,11 +29376,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -29394,13 +29394,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -29411,11 +29411,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -29427,11 +29427,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -29445,13 +29445,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29463,12 +29463,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29481,12 +29481,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29499,12 +29499,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29518,13 +29518,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29536,12 +29536,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29554,12 +29554,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29572,12 +29572,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29591,13 +29591,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -29612,13 +29612,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -29629,10 +29629,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -29643,10 +29643,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -29657,13 +29657,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -29673,10 +29673,10 @@
       .m(3)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -29687,10 +29687,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -29701,10 +29701,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -29717,12 +29717,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -29734,11 +29734,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -29750,11 +29750,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -29765,11 +29765,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -29781,11 +29781,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -29799,13 +29799,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -29816,11 +29816,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -29832,11 +29832,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -29850,13 +29850,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -29867,11 +29867,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -29883,11 +29883,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -29901,13 +29901,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29919,12 +29919,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29937,12 +29937,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29955,12 +29955,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29974,13 +29974,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29992,12 +29992,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30010,12 +30010,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30028,12 +30028,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30047,13 +30047,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -30068,13 +30068,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -30085,10 +30085,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -30099,10 +30099,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -30113,13 +30113,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -30129,10 +30129,10 @@
       .m(4)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -30143,10 +30143,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -30157,10 +30157,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -30173,12 +30173,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -30190,11 +30190,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -30206,11 +30206,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -30221,11 +30221,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -30237,11 +30237,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -30255,13 +30255,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -30272,11 +30272,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -30288,11 +30288,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -30306,13 +30306,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -30323,11 +30323,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -30339,11 +30339,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -30357,13 +30357,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30375,12 +30375,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30393,12 +30393,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30411,12 +30411,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30430,13 +30430,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30448,12 +30448,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30466,12 +30466,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30484,12 +30484,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30503,13 +30503,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -30524,13 +30524,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -30541,10 +30541,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -30555,10 +30555,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -30569,13 +30569,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -30585,10 +30585,10 @@
       .m(1)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -30599,10 +30599,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -30613,10 +30613,10 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -30629,12 +30629,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -30646,11 +30646,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -30662,11 +30662,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -30677,11 +30677,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -30693,11 +30693,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -30711,13 +30711,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -30728,11 +30728,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -30744,11 +30744,11 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -30762,13 +30762,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -30779,11 +30779,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -30795,11 +30795,11 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -30813,13 +30813,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30831,12 +30831,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30849,12 +30849,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30867,12 +30867,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30886,13 +30886,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30904,12 +30904,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30922,12 +30922,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30940,12 +30940,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30959,13 +30959,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -30980,13 +30980,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -30997,10 +30997,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -31011,10 +31011,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -31025,13 +31025,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -31041,10 +31041,10 @@
       .m(2)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -31055,10 +31055,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -31069,10 +31069,10 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -31085,12 +31085,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -31102,11 +31102,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -31118,11 +31118,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -31133,11 +31133,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -31149,11 +31149,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -31167,13 +31167,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -31184,11 +31184,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -31200,11 +31200,11 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -31218,13 +31218,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -31235,11 +31235,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -31251,11 +31251,11 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -31269,13 +31269,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31287,12 +31287,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31305,12 +31305,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31323,12 +31323,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31342,13 +31342,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31360,12 +31360,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31378,12 +31378,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31396,12 +31396,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31415,13 +31415,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -31436,13 +31436,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -31453,10 +31453,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -31467,10 +31467,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -31481,13 +31481,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -31497,10 +31497,10 @@
       .m(3)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -31511,10 +31511,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -31525,10 +31525,10 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -31541,12 +31541,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -31558,11 +31558,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -31574,11 +31574,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -31589,11 +31589,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -31605,11 +31605,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -31623,13 +31623,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -31640,11 +31640,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -31656,11 +31656,11 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -31674,13 +31674,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -31691,11 +31691,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -31707,11 +31707,11 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -31725,13 +31725,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31743,12 +31743,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31761,12 +31761,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31779,12 +31779,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31798,13 +31798,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31816,12 +31816,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31834,12 +31834,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31852,12 +31852,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31871,13 +31871,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -31892,13 +31892,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -31909,10 +31909,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -31923,10 +31923,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -31937,13 +31937,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -31953,10 +31953,10 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -31967,10 +31967,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -31981,10 +31981,10 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -31997,12 +31997,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -32014,11 +32014,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -32030,11 +32030,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -32045,11 +32045,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -32061,11 +32061,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -32079,13 +32079,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -32096,11 +32096,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -32112,11 +32112,11 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -32130,13 +32130,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -32147,11 +32147,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -32163,11 +32163,11 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -32181,13 +32181,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32199,12 +32199,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32217,12 +32217,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32235,12 +32235,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32254,13 +32254,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32272,12 +32272,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32290,12 +32290,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32308,12 +32308,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32327,13 +32327,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -32348,13 +32348,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -32365,10 +32365,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -32379,10 +32379,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -32393,13 +32393,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -32409,10 +32409,10 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -32423,10 +32423,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -32437,10 +32437,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -32453,12 +32453,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -32470,11 +32470,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -32486,11 +32486,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -32501,11 +32501,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -32517,11 +32517,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -32535,13 +32535,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -32552,11 +32552,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -32568,11 +32568,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -32586,13 +32586,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -32603,11 +32603,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -32619,11 +32619,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -32637,13 +32637,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32655,12 +32655,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32673,12 +32673,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32691,12 +32691,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32710,13 +32710,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32728,12 +32728,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32746,12 +32746,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32764,12 +32764,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32783,13 +32783,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -32804,13 +32804,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -32821,10 +32821,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -32835,10 +32835,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -32849,13 +32849,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -32865,10 +32865,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -32879,10 +32879,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -32893,10 +32893,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -32909,12 +32909,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -32926,11 +32926,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -32942,11 +32942,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -32957,11 +32957,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -32973,11 +32973,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -32991,13 +32991,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -33008,11 +33008,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -33024,11 +33024,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -33042,13 +33042,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -33059,11 +33059,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -33075,11 +33075,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -33093,13 +33093,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33111,12 +33111,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33129,12 +33129,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33147,12 +33147,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33166,13 +33166,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33184,12 +33184,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33202,12 +33202,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33220,12 +33220,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33239,13 +33239,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -33260,13 +33260,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -33277,10 +33277,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -33291,10 +33291,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -33305,13 +33305,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -33321,10 +33321,10 @@
       .m(3)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -33335,10 +33335,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -33349,10 +33349,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -33365,12 +33365,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -33382,11 +33382,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -33398,11 +33398,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -33413,11 +33413,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -33429,11 +33429,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -33447,13 +33447,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -33464,11 +33464,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -33480,11 +33480,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -33498,13 +33498,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -33515,11 +33515,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -33531,11 +33531,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -33549,13 +33549,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33567,12 +33567,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33585,12 +33585,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33603,12 +33603,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33622,13 +33622,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33640,12 +33640,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33658,12 +33658,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33676,12 +33676,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33695,13 +33695,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -33716,13 +33716,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -33733,10 +33733,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -33747,10 +33747,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -33761,13 +33761,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -33777,10 +33777,10 @@
       .m(4)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -33791,10 +33791,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -33805,10 +33805,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -33821,12 +33821,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -33838,11 +33838,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -33854,11 +33854,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -33869,11 +33869,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -33885,11 +33885,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -33903,13 +33903,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -33920,11 +33920,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -33936,11 +33936,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -33954,13 +33954,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -33971,11 +33971,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -33987,11 +33987,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -34005,13 +34005,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34023,12 +34023,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34041,12 +34041,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34059,12 +34059,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34078,13 +34078,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34096,12 +34096,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34114,12 +34114,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34132,12 +34132,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34151,13 +34151,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -34172,13 +34172,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -34189,10 +34189,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -34203,10 +34203,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -34217,13 +34217,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -34233,10 +34233,10 @@
       .m(1)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -34247,10 +34247,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -34261,10 +34261,10 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -34277,12 +34277,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -34294,11 +34294,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -34310,11 +34310,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -34325,11 +34325,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -34341,11 +34341,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -34359,13 +34359,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -34376,11 +34376,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -34392,11 +34392,11 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -34410,13 +34410,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -34427,11 +34427,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -34443,11 +34443,11 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -34461,13 +34461,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34479,12 +34479,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34497,12 +34497,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34515,12 +34515,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34534,13 +34534,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34552,12 +34552,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34570,12 +34570,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34588,12 +34588,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34607,13 +34607,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -34628,13 +34628,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -34645,10 +34645,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -34659,10 +34659,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -34673,13 +34673,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -34689,10 +34689,10 @@
       .m(2)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -34703,10 +34703,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -34717,10 +34717,10 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -34733,12 +34733,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -34750,11 +34750,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -34766,11 +34766,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -34781,11 +34781,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -34797,11 +34797,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -34815,13 +34815,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -34832,11 +34832,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -34848,11 +34848,11 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -34866,13 +34866,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -34883,11 +34883,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -34899,11 +34899,11 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -34917,13 +34917,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34935,12 +34935,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34953,12 +34953,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34971,12 +34971,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -34990,13 +34990,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35008,12 +35008,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35026,12 +35026,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35044,12 +35044,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35063,13 +35063,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -35084,13 +35084,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -35101,10 +35101,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -35115,10 +35115,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -35129,13 +35129,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -35145,10 +35145,10 @@
       .m(3)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -35159,10 +35159,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -35173,10 +35173,10 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -35189,12 +35189,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -35206,11 +35206,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -35222,11 +35222,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -35237,11 +35237,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -35253,11 +35253,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -35271,13 +35271,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -35288,11 +35288,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -35304,11 +35304,11 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -35322,13 +35322,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -35339,11 +35339,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -35355,11 +35355,11 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -35373,13 +35373,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35391,12 +35391,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35409,12 +35409,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35427,12 +35427,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35446,13 +35446,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35464,12 +35464,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35482,12 +35482,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35500,12 +35500,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35519,13 +35519,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -35540,13 +35540,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -35557,10 +35557,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -35571,10 +35571,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -35585,13 +35585,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -35601,10 +35601,10 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -35615,10 +35615,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -35629,10 +35629,10 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -35645,12 +35645,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -35662,11 +35662,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -35678,11 +35678,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -35693,11 +35693,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -35709,11 +35709,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -35727,13 +35727,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -35744,11 +35744,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -35760,11 +35760,11 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -35778,13 +35778,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -35795,11 +35795,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -35811,11 +35811,11 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -35829,13 +35829,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35847,12 +35847,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35865,12 +35865,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35883,12 +35883,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35902,13 +35902,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35920,12 +35920,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35938,12 +35938,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35956,12 +35956,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -35975,13 +35975,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -35996,13 +35996,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -36013,10 +36013,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -36027,10 +36027,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -36041,13 +36041,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -36057,10 +36057,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -36071,10 +36071,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -36085,10 +36085,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -36101,12 +36101,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -36118,11 +36118,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -36134,11 +36134,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -36149,11 +36149,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -36165,11 +36165,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -36183,13 +36183,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -36200,11 +36200,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -36216,11 +36216,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -36234,13 +36234,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -36251,11 +36251,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -36267,11 +36267,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -36285,13 +36285,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36303,12 +36303,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36321,12 +36321,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36339,12 +36339,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36358,13 +36358,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36376,12 +36376,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36394,12 +36394,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36412,12 +36412,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36431,13 +36431,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -36452,13 +36452,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -36469,10 +36469,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -36483,10 +36483,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C4__NEONDOT, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -36497,13 +36497,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -36513,10 +36513,10 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -36527,10 +36527,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -36541,10 +36541,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -36557,12 +36557,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -36574,11 +36574,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -36590,11 +36590,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -36605,11 +36605,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -36621,11 +36621,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -36639,13 +36639,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -36656,11 +36656,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -36672,11 +36672,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -36690,13 +36690,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -36707,11 +36707,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -36723,11 +36723,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -36741,13 +36741,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36759,12 +36759,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36777,12 +36777,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36795,12 +36795,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36814,13 +36814,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36832,12 +36832,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36850,12 +36850,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36868,12 +36868,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36887,13 +36887,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -36908,13 +36908,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -36925,10 +36925,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -36939,10 +36939,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X8C4__NEONDOT, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -36953,13 +36953,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -36969,10 +36969,10 @@
       .m(6)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -36983,10 +36983,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -36997,10 +36997,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 6; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -37013,12 +37013,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 6; m++) {
       GemmMicrokernelTester()
@@ -37030,11 +37030,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -37046,11 +37046,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -37061,11 +37061,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -37077,11 +37077,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -37095,13 +37095,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -37112,11 +37112,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -37128,11 +37128,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -37146,13 +37146,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -37163,11 +37163,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -37179,11 +37179,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -37197,13 +37197,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37215,12 +37215,12 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37233,12 +37233,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37251,12 +37251,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37270,13 +37270,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37288,12 +37288,12 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37306,12 +37306,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37324,12 +37324,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37343,13 +37343,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -37364,13 +37364,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -37381,10 +37381,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -37395,10 +37395,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X8C4__NEONDOT, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -37409,13 +37409,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -37425,10 +37425,10 @@
       .m(8)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -37439,10 +37439,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -37453,10 +37453,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 8; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -37469,12 +37469,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 8; m++) {
       GemmMicrokernelTester()
@@ -37486,11 +37486,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -37502,11 +37502,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -37517,11 +37517,11 @@
         .m(8)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -37533,11 +37533,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -37551,13 +37551,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -37568,11 +37568,11 @@
         .m(8)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -37584,11 +37584,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -37602,13 +37602,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -37619,11 +37619,11 @@
         .m(8)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -37635,11 +37635,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -37653,13 +37653,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37671,12 +37671,12 @@
           .m(8)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37689,12 +37689,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37707,12 +37707,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37726,13 +37726,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37744,12 +37744,12 @@
           .m(8)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37762,12 +37762,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37780,12 +37780,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37799,13 +37799,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -37820,13 +37820,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -37837,10 +37837,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -37851,10 +37851,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_8X8C4__NEONDOT, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -37865,13 +37865,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -37881,10 +37881,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -37895,10 +37895,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -37909,10 +37909,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -37925,12 +37925,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -37942,11 +37942,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -37958,11 +37958,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -37973,11 +37973,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -37989,11 +37989,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -38007,13 +38007,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -38024,11 +38024,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -38040,11 +38040,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -38058,13 +38058,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -38075,11 +38075,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -38091,11 +38091,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -38109,13 +38109,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38127,12 +38127,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38145,12 +38145,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38163,12 +38163,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38182,13 +38182,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38200,12 +38200,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38218,12 +38218,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38236,12 +38236,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38255,13 +38255,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -38276,13 +38276,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -38293,10 +38293,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -38307,10 +38307,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__NEONDOT, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -38321,13 +38321,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -38337,10 +38337,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -38351,10 +38351,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -38365,10 +38365,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -38381,12 +38381,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -38398,11 +38398,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -38414,11 +38414,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -38429,11 +38429,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -38445,11 +38445,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -38463,13 +38463,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -38480,11 +38480,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -38496,11 +38496,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -38514,13 +38514,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -38531,11 +38531,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -38547,11 +38547,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -38565,13 +38565,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38583,12 +38583,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38601,12 +38601,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38619,12 +38619,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38638,13 +38638,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38656,12 +38656,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38674,12 +38674,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38692,12 +38692,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38711,13 +38711,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -38732,13 +38732,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -38749,10 +38749,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -38763,10 +38763,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__NEONDOT, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -38777,13 +38777,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -38793,10 +38793,10 @@
       .m(6)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -38807,10 +38807,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -38821,10 +38821,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 6; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -38837,12 +38837,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 6; m++) {
       GemmMicrokernelTester()
@@ -38854,11 +38854,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -38870,11 +38870,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -38885,11 +38885,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -38901,11 +38901,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -38919,13 +38919,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -38936,11 +38936,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -38952,11 +38952,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -38970,13 +38970,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -38987,11 +38987,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -39003,11 +39003,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -39021,13 +39021,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39039,12 +39039,12 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39057,12 +39057,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39075,12 +39075,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39094,13 +39094,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39112,12 +39112,12 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39130,12 +39130,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39148,12 +39148,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39167,13 +39167,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -39188,13 +39188,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -39205,10 +39205,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -39219,10 +39219,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_6X16C4__NEONDOT, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -39233,13 +39233,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -39249,10 +39249,10 @@
       .m(8)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -39263,10 +39263,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -39277,10 +39277,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 8; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -39293,12 +39293,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 8; m++) {
       GemmMicrokernelTester()
@@ -39310,11 +39310,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -39326,11 +39326,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -39341,11 +39341,11 @@
         .m(8)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -39357,11 +39357,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -39375,13 +39375,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -39392,11 +39392,11 @@
         .m(8)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -39408,11 +39408,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -39426,13 +39426,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -39443,11 +39443,11 @@
         .m(8)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -39459,11 +39459,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -39477,13 +39477,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39495,12 +39495,12 @@
           .m(8)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39513,12 +39513,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39531,12 +39531,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39550,13 +39550,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39568,12 +39568,12 @@
           .m(8)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39586,12 +39586,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39604,12 +39604,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39623,13 +39623,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -39644,13 +39644,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -39661,10 +39661,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -39675,10 +39675,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_8X16C4__NEONDOT, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -39689,13 +39689,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -39705,10 +39705,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -39719,10 +39719,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -39733,10 +39733,10 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -39749,12 +39749,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -39766,11 +39766,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -39782,11 +39782,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -39797,11 +39797,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -39813,11 +39813,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -39831,13 +39831,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -39848,11 +39848,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -39864,11 +39864,11 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -39882,13 +39882,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -39899,11 +39899,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -39915,11 +39915,11 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -39933,13 +39933,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -39951,12 +39951,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -39969,12 +39969,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -39987,12 +39987,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -40006,13 +40006,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -40024,12 +40024,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -40042,12 +40042,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -40060,12 +40060,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -40079,13 +40079,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -40100,13 +40100,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -40117,10 +40117,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -40131,10 +40131,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -40145,13 +40145,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -40161,10 +40161,10 @@
       .m(1)
       .n(16)
       .k(4)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -40175,10 +40175,10 @@
       .n(16)
       .k(4)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -40189,10 +40189,10 @@
       .n(16)
       .k(4)
       .a_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -40205,12 +40205,12 @@
           .n(n)
           .k(4)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -40222,11 +40222,11 @@
         .n(16)
         .k(4)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -40238,11 +40238,11 @@
         .n(n)
         .k(4)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, k_lt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_lt_4) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 4; k++) {
       GemmMicrokernelTester()
@@ -40253,11 +40253,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, k_lt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_lt_4_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 4; k++) {
       GemmMicrokernelTester()
@@ -40269,11 +40269,11 @@
         .n(16)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, k_lt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_lt_4_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 4; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -40287,13 +40287,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, k_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_gt_4) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 5; k < 8; k++) {
       GemmMicrokernelTester()
@@ -40304,11 +40304,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, k_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_gt_4_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 5; k < 8; k++) {
       GemmMicrokernelTester()
@@ -40320,11 +40320,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, k_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_gt_4_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 5; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -40338,13 +40338,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, k_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_div_4) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 8; k <= 40; k += 4) {
       GemmMicrokernelTester()
@@ -40355,11 +40355,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, k_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_div_4_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 8; k <= 40; k += 4) {
       GemmMicrokernelTester()
@@ -40371,11 +40371,11 @@
         .n(16)
         .k(k)
         .a_stride(43)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, k_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_div_4_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 8; k <= 40; k += 4) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -40389,13 +40389,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -40407,12 +40407,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -40425,12 +40425,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -40443,12 +40443,12 @@
           .n(n)
           .k(k)
           .a_stride(23)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -40462,13 +40462,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -40480,12 +40480,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -40498,12 +40498,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -40516,12 +40516,12 @@
           .n(n)
           .k(k)
           .a_stride(23)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -40535,13 +40535,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 20; k += 5) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -40556,13 +40556,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -40573,10 +40573,10 @@
       .n(16)
       .k(4)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -40587,10 +40587,10 @@
       .n(16)
       .k(4)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD32, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -40601,13 +40601,13 @@
       .n(16)
       .k(4)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -40617,10 +40617,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -40631,10 +40631,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -40645,10 +40645,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -40661,12 +40661,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -40678,11 +40678,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -40694,11 +40694,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -40709,11 +40709,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -40725,11 +40725,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -40743,13 +40743,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -40760,11 +40760,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -40776,11 +40776,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -40794,13 +40794,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -40811,11 +40811,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -40827,11 +40827,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -40845,13 +40845,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40863,12 +40863,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40881,12 +40881,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40899,12 +40899,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40918,13 +40918,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40936,12 +40936,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40954,12 +40954,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40972,12 +40972,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40991,13 +40991,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -41012,13 +41012,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -41029,10 +41029,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -41043,10 +41043,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -41057,13 +41057,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41073,10 +41073,10 @@
       .m(4)
       .n(16)
       .k(4)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41087,10 +41087,10 @@
       .n(16)
       .k(4)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41101,10 +41101,10 @@
       .n(16)
       .k(4)
       .a_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -41117,12 +41117,12 @@
           .n(n)
           .k(4)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -41134,11 +41134,11 @@
         .n(16)
         .k(4)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -41150,11 +41150,11 @@
         .n(n)
         .k(4)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 4; k++) {
       GemmMicrokernelTester()
@@ -41165,11 +41165,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 4; k++) {
       GemmMicrokernelTester()
@@ -41181,11 +41181,11 @@
         .n(16)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_lt_4_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 4; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41199,13 +41199,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 5; k < 8; k++) {
       GemmMicrokernelTester()
@@ -41216,11 +41216,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 5; k < 8; k++) {
       GemmMicrokernelTester()
@@ -41232,11 +41232,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_gt_4_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 5; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41250,13 +41250,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, k_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_div_4) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 8; k <= 40; k += 4) {
       GemmMicrokernelTester()
@@ -41267,11 +41267,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, k_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_div_4_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 8; k <= 40; k += 4) {
       GemmMicrokernelTester()
@@ -41283,11 +41283,11 @@
         .n(16)
         .k(k)
         .a_stride(43)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, k_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_div_4_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 8; k <= 40; k += 4) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41301,13 +41301,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -41319,12 +41319,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -41337,12 +41337,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -41355,12 +41355,12 @@
           .n(n)
           .k(k)
           .a_stride(23)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -41374,13 +41374,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -41392,12 +41392,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -41410,12 +41410,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -41428,12 +41428,12 @@
           .n(n)
           .k(k)
           .a_stride(23)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 20; k += 5) {
@@ -41447,13 +41447,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 20; k += 5) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41468,13 +41468,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41485,10 +41485,10 @@
       .n(16)
       .k(4)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41499,10 +41499,10 @@
       .n(16)
       .k(4)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD32, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41513,13 +41513,13 @@
       .n(16)
       .k(4)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41529,10 +41529,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41543,10 +41543,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41557,10 +41557,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -41573,12 +41573,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -41590,11 +41590,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -41606,11 +41606,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -41621,11 +41621,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -41637,11 +41637,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41655,13 +41655,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -41672,11 +41672,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -41688,11 +41688,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41706,13 +41706,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -41723,11 +41723,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -41739,11 +41739,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41757,13 +41757,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41775,12 +41775,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41793,12 +41793,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41811,12 +41811,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41830,13 +41830,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41848,12 +41848,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41866,12 +41866,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41884,12 +41884,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41903,13 +41903,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41924,13 +41924,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41941,10 +41941,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41955,10 +41955,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41969,13 +41969,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41985,10 +41985,10 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41999,10 +41999,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -42013,10 +42013,10 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -42029,12 +42029,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -42046,11 +42046,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -42062,11 +42062,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -42077,11 +42077,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -42093,11 +42093,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -42111,13 +42111,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -42128,11 +42128,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -42144,11 +42144,11 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -42162,13 +42162,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -42179,11 +42179,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -42195,11 +42195,11 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -42213,13 +42213,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -42231,12 +42231,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -42249,12 +42249,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -42267,12 +42267,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -42286,13 +42286,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -42304,12 +42304,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -42322,12 +42322,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_a) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -42340,12 +42340,12 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -42359,13 +42359,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -42380,13 +42380,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -42397,10 +42397,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -42411,10 +42411,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -42425,13 +42425,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -42441,10 +42441,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -42455,10 +42455,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -42469,10 +42469,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -42485,12 +42485,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -42502,11 +42502,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -42518,11 +42518,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -42533,11 +42533,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -42549,11 +42549,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -42567,13 +42567,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -42584,11 +42584,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -42600,11 +42600,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -42618,13 +42618,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -42635,11 +42635,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -42651,11 +42651,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -42669,13 +42669,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42687,12 +42687,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42705,12 +42705,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42723,12 +42723,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42742,13 +42742,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42760,12 +42760,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42778,12 +42778,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42796,12 +42796,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42815,13 +42815,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -42836,13 +42836,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -42853,10 +42853,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -42867,10 +42867,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -42881,13 +42881,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -42897,10 +42897,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -42911,10 +42911,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -42925,10 +42925,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -42941,12 +42941,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -42958,11 +42958,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -42974,11 +42974,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -42989,11 +42989,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -43005,11 +43005,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -43023,13 +43023,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -43040,11 +43040,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -43056,11 +43056,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -43074,13 +43074,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -43091,11 +43091,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -43107,11 +43107,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -43125,13 +43125,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43143,12 +43143,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43161,12 +43161,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43179,12 +43179,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43198,13 +43198,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43216,12 +43216,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43234,12 +43234,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43252,12 +43252,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43271,13 +43271,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -43292,13 +43292,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -43309,10 +43309,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -43323,10 +43323,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -43337,13 +43337,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -43353,10 +43353,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -43367,10 +43367,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -43381,10 +43381,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -43397,12 +43397,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -43414,11 +43414,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -43430,11 +43430,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -43445,11 +43445,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -43461,11 +43461,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -43479,13 +43479,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -43496,11 +43496,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -43512,11 +43512,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -43530,13 +43530,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -43547,11 +43547,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -43563,11 +43563,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -43581,13 +43581,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43599,12 +43599,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43617,12 +43617,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43635,12 +43635,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43654,13 +43654,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43672,12 +43672,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43690,12 +43690,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43708,12 +43708,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43727,13 +43727,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -43748,13 +43748,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -43765,10 +43765,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -43779,10 +43779,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -43793,13 +43793,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -43809,10 +43809,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -43823,10 +43823,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -43837,10 +43837,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -43853,12 +43853,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -43870,11 +43870,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -43886,11 +43886,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -43901,11 +43901,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -43917,11 +43917,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -43935,13 +43935,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -43952,11 +43952,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -43968,11 +43968,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -43986,13 +43986,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -44003,11 +44003,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -44019,11 +44019,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -44037,13 +44037,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44055,12 +44055,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44073,12 +44073,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44091,12 +44091,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44110,13 +44110,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44128,12 +44128,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44146,12 +44146,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44164,12 +44164,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44183,13 +44183,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -44204,13 +44204,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -44221,10 +44221,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -44235,10 +44235,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -44249,13 +44249,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -44265,10 +44265,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -44279,10 +44279,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -44293,10 +44293,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -44309,12 +44309,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -44326,11 +44326,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -44342,11 +44342,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -44357,11 +44357,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -44373,11 +44373,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -44391,13 +44391,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -44408,11 +44408,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -44424,11 +44424,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -44442,13 +44442,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -44459,11 +44459,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -44475,11 +44475,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -44493,13 +44493,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44511,12 +44511,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44529,12 +44529,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44547,12 +44547,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44566,13 +44566,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44584,12 +44584,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44602,12 +44602,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44620,12 +44620,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44639,13 +44639,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -44660,13 +44660,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -44677,10 +44677,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -44691,10 +44691,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -44705,13 +44705,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -44721,10 +44721,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -44735,10 +44735,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -44749,10 +44749,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -44765,12 +44765,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -44782,11 +44782,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -44798,11 +44798,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -44813,11 +44813,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -44829,11 +44829,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -44847,13 +44847,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -44864,11 +44864,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -44880,11 +44880,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -44898,13 +44898,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -44915,11 +44915,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -44931,11 +44931,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -44949,13 +44949,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44967,12 +44967,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44985,12 +44985,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45003,12 +45003,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45022,13 +45022,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45040,12 +45040,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45058,12 +45058,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45076,12 +45076,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45095,13 +45095,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -45116,13 +45116,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -45133,10 +45133,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -45147,10 +45147,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -45161,13 +45161,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -45177,10 +45177,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -45191,10 +45191,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -45205,10 +45205,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -45221,12 +45221,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -45238,11 +45238,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -45254,11 +45254,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -45269,11 +45269,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -45285,11 +45285,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -45303,13 +45303,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -45320,11 +45320,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -45336,11 +45336,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -45354,13 +45354,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -45371,11 +45371,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -45387,11 +45387,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -45405,13 +45405,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45423,12 +45423,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45441,12 +45441,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45459,12 +45459,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45478,13 +45478,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45496,12 +45496,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45514,12 +45514,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45532,12 +45532,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45551,13 +45551,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -45572,13 +45572,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -45589,10 +45589,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -45603,10 +45603,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -45617,13 +45617,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -45633,10 +45633,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -45647,10 +45647,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -45661,10 +45661,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -45677,12 +45677,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -45694,11 +45694,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -45710,11 +45710,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -45725,11 +45725,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -45741,11 +45741,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -45759,13 +45759,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -45776,11 +45776,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -45792,11 +45792,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -45810,13 +45810,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -45827,11 +45827,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -45843,11 +45843,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -45861,13 +45861,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45879,12 +45879,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45897,12 +45897,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45915,12 +45915,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45934,13 +45934,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45952,12 +45952,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45970,12 +45970,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45988,12 +45988,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46007,13 +46007,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -46028,13 +46028,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -46045,10 +46045,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -46059,10 +46059,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -46073,13 +46073,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -46089,10 +46089,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -46103,10 +46103,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -46117,10 +46117,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -46133,12 +46133,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -46150,11 +46150,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -46166,11 +46166,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -46181,11 +46181,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -46197,11 +46197,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -46215,13 +46215,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -46232,11 +46232,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -46248,11 +46248,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -46266,13 +46266,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -46283,11 +46283,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -46299,11 +46299,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -46317,13 +46317,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46335,12 +46335,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46353,12 +46353,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46371,12 +46371,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46390,13 +46390,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46408,12 +46408,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46426,12 +46426,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46444,12 +46444,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46463,13 +46463,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -46484,13 +46484,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -46501,10 +46501,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -46515,10 +46515,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -46529,13 +46529,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -46545,10 +46545,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -46559,10 +46559,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -46573,10 +46573,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -46589,12 +46589,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -46606,11 +46606,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -46622,11 +46622,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -46637,11 +46637,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -46653,11 +46653,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -46671,13 +46671,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -46688,11 +46688,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -46704,11 +46704,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -46722,13 +46722,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -46739,11 +46739,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -46755,11 +46755,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -46773,13 +46773,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46791,12 +46791,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46809,12 +46809,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46827,12 +46827,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46846,13 +46846,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46864,12 +46864,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46882,12 +46882,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46900,12 +46900,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46919,13 +46919,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -46940,13 +46940,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -46957,10 +46957,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -46971,10 +46971,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -46985,13 +46985,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -47001,10 +47001,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -47015,10 +47015,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -47029,10 +47029,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -47045,12 +47045,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -47062,11 +47062,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -47078,11 +47078,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -47093,11 +47093,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -47109,11 +47109,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -47127,13 +47127,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -47144,11 +47144,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -47160,11 +47160,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -47178,13 +47178,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -47195,11 +47195,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -47211,11 +47211,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -47229,13 +47229,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47247,12 +47247,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47265,12 +47265,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47283,12 +47283,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47302,13 +47302,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47320,12 +47320,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47338,12 +47338,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47356,12 +47356,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47375,13 +47375,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -47396,13 +47396,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -47413,10 +47413,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -47427,10 +47427,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -47441,13 +47441,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -47457,10 +47457,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -47471,10 +47471,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -47485,10 +47485,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -47501,12 +47501,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -47518,11 +47518,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -47534,11 +47534,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -47549,11 +47549,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -47565,11 +47565,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -47583,13 +47583,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -47600,11 +47600,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -47616,11 +47616,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -47634,13 +47634,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -47651,11 +47651,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -47667,11 +47667,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -47685,13 +47685,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47703,12 +47703,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47721,12 +47721,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47739,12 +47739,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47758,13 +47758,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47776,12 +47776,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47794,12 +47794,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47812,12 +47812,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47831,13 +47831,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -47852,13 +47852,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -47869,10 +47869,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -47883,10 +47883,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -47897,13 +47897,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -47913,10 +47913,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -47927,10 +47927,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -47941,10 +47941,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -47957,12 +47957,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -47974,11 +47974,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -47990,11 +47990,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -48005,11 +48005,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -48021,11 +48021,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -48039,13 +48039,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -48056,11 +48056,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -48072,11 +48072,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -48090,13 +48090,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -48107,11 +48107,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -48123,11 +48123,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -48141,13 +48141,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48159,12 +48159,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48177,12 +48177,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48195,12 +48195,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48214,13 +48214,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48232,12 +48232,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48250,12 +48250,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48268,12 +48268,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48287,13 +48287,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -48308,13 +48308,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -48325,10 +48325,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -48339,10 +48339,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -48353,13 +48353,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -48369,10 +48369,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -48383,10 +48383,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -48397,10 +48397,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -48413,12 +48413,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -48430,11 +48430,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -48446,11 +48446,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -48461,11 +48461,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -48477,11 +48477,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -48495,13 +48495,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -48512,11 +48512,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -48528,11 +48528,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -48546,13 +48546,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -48563,11 +48563,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -48579,11 +48579,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -48597,13 +48597,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48615,12 +48615,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48633,12 +48633,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48651,12 +48651,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48670,13 +48670,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48688,12 +48688,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48706,12 +48706,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48724,12 +48724,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48743,13 +48743,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -48764,13 +48764,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -48781,10 +48781,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -48795,10 +48795,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -48809,13 +48809,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -48825,10 +48825,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -48839,10 +48839,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -48853,10 +48853,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -48869,12 +48869,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -48886,11 +48886,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -48902,11 +48902,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -48917,11 +48917,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -48933,11 +48933,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -48951,13 +48951,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -48968,11 +48968,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -48984,11 +48984,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -49002,13 +49002,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -49019,11 +49019,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -49035,11 +49035,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -49053,13 +49053,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49071,12 +49071,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49089,12 +49089,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49107,12 +49107,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49126,13 +49126,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49144,12 +49144,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49162,12 +49162,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49180,12 +49180,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49199,13 +49199,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -49220,13 +49220,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -49237,10 +49237,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -49251,10 +49251,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -49265,13 +49265,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -49281,10 +49281,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -49295,10 +49295,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -49309,10 +49309,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -49325,12 +49325,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -49342,11 +49342,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -49358,11 +49358,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -49373,11 +49373,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -49389,11 +49389,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -49407,13 +49407,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -49424,11 +49424,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -49440,11 +49440,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -49458,13 +49458,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -49475,11 +49475,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -49491,11 +49491,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -49509,13 +49509,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49527,12 +49527,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49545,12 +49545,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49563,12 +49563,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49582,13 +49582,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49600,12 +49600,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49618,12 +49618,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49636,12 +49636,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49655,13 +49655,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -49676,13 +49676,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -49693,10 +49693,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -49707,10 +49707,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -49721,13 +49721,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -49737,10 +49737,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -49751,10 +49751,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -49765,10 +49765,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -49781,12 +49781,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -49798,11 +49798,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -49814,11 +49814,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -49829,11 +49829,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -49845,11 +49845,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -49863,13 +49863,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -49880,11 +49880,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -49896,11 +49896,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -49914,13 +49914,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -49931,11 +49931,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -49947,11 +49947,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -49965,13 +49965,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49983,12 +49983,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50001,12 +50001,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50019,12 +50019,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50038,13 +50038,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50056,12 +50056,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50074,12 +50074,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50092,12 +50092,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50111,13 +50111,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -50132,13 +50132,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -50149,10 +50149,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -50163,10 +50163,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -50177,13 +50177,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -50193,10 +50193,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -50207,10 +50207,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -50221,10 +50221,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -50237,12 +50237,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -50254,11 +50254,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -50270,11 +50270,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -50285,11 +50285,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -50301,11 +50301,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -50319,13 +50319,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -50336,11 +50336,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -50352,11 +50352,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -50370,13 +50370,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -50387,11 +50387,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -50403,11 +50403,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -50421,13 +50421,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50439,12 +50439,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50457,12 +50457,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50475,12 +50475,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50494,13 +50494,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50512,12 +50512,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50530,12 +50530,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50548,12 +50548,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50567,13 +50567,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -50588,13 +50588,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -50605,10 +50605,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -50619,10 +50619,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -50633,13 +50633,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -50649,10 +50649,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -50663,10 +50663,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -50677,10 +50677,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -50693,12 +50693,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -50710,11 +50710,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -50726,11 +50726,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -50741,11 +50741,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -50757,11 +50757,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -50775,13 +50775,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -50792,11 +50792,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -50808,11 +50808,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -50826,13 +50826,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -50843,11 +50843,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -50859,11 +50859,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -50877,13 +50877,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50895,12 +50895,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50913,12 +50913,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50931,12 +50931,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50950,13 +50950,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50968,12 +50968,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50986,12 +50986,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51004,12 +51004,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51023,13 +51023,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -51044,13 +51044,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -51061,10 +51061,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -51075,10 +51075,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -51089,13 +51089,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -51105,10 +51105,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -51119,10 +51119,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -51133,10 +51133,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -51149,12 +51149,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -51166,11 +51166,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -51182,11 +51182,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -51197,11 +51197,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -51213,11 +51213,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -51231,13 +51231,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -51248,11 +51248,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -51264,11 +51264,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -51282,13 +51282,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -51299,11 +51299,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -51315,11 +51315,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -51333,13 +51333,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51351,12 +51351,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51369,12 +51369,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51387,12 +51387,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51406,13 +51406,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51424,12 +51424,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51442,12 +51442,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51460,12 +51460,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51479,13 +51479,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -51500,13 +51500,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -51517,10 +51517,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -51531,10 +51531,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -51545,13 +51545,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -51561,10 +51561,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -51575,10 +51575,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -51589,10 +51589,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -51605,12 +51605,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -51622,11 +51622,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -51638,11 +51638,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -51653,11 +51653,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -51669,11 +51669,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -51687,13 +51687,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -51704,11 +51704,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -51720,11 +51720,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -51738,13 +51738,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -51755,11 +51755,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -51771,11 +51771,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -51789,13 +51789,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51807,12 +51807,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51825,12 +51825,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51843,12 +51843,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51862,13 +51862,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51880,12 +51880,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51898,12 +51898,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51916,12 +51916,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51935,13 +51935,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -51956,13 +51956,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -51973,10 +51973,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -51987,10 +51987,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE2_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -52001,13 +52001,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -52017,10 +52017,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -52031,10 +52031,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -52045,10 +52045,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -52061,12 +52061,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -52078,11 +52078,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -52094,11 +52094,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -52109,11 +52109,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -52125,11 +52125,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -52143,13 +52143,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -52160,11 +52160,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -52176,11 +52176,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -52194,13 +52194,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -52211,11 +52211,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -52227,11 +52227,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -52245,13 +52245,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52263,12 +52263,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52281,12 +52281,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52299,12 +52299,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52318,13 +52318,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52336,12 +52336,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52354,12 +52354,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52372,12 +52372,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52391,13 +52391,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -52412,13 +52412,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -52429,10 +52429,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -52443,10 +52443,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE2_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -52457,13 +52457,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -52473,10 +52473,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -52487,10 +52487,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -52501,10 +52501,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -52517,12 +52517,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -52534,11 +52534,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -52550,11 +52550,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -52565,11 +52565,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -52581,11 +52581,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -52599,13 +52599,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -52616,11 +52616,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -52632,11 +52632,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -52650,13 +52650,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -52667,11 +52667,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -52683,11 +52683,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -52701,13 +52701,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52719,12 +52719,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52737,12 +52737,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52755,12 +52755,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52774,13 +52774,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52792,12 +52792,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52810,12 +52810,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52828,12 +52828,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52847,13 +52847,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -52868,13 +52868,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -52885,10 +52885,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -52899,10 +52899,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE2_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -52913,13 +52913,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -52929,10 +52929,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -52943,10 +52943,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -52957,10 +52957,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -52973,12 +52973,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -52990,11 +52990,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -53006,11 +53006,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -53021,11 +53021,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -53037,11 +53037,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -53055,13 +53055,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -53072,11 +53072,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -53088,11 +53088,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -53106,13 +53106,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -53123,11 +53123,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -53139,11 +53139,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -53157,13 +53157,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53175,12 +53175,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53193,12 +53193,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53211,12 +53211,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53230,13 +53230,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53248,12 +53248,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53266,12 +53266,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53284,12 +53284,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53303,13 +53303,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -53324,13 +53324,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -53341,10 +53341,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -53355,10 +53355,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE2_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -53369,13 +53369,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -53385,10 +53385,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -53399,10 +53399,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -53413,10 +53413,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -53429,12 +53429,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -53446,11 +53446,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -53462,11 +53462,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -53477,11 +53477,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -53493,11 +53493,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -53511,13 +53511,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -53528,11 +53528,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -53544,11 +53544,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -53562,13 +53562,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -53579,11 +53579,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -53595,11 +53595,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -53613,13 +53613,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53631,12 +53631,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53649,12 +53649,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53667,12 +53667,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53686,13 +53686,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53704,12 +53704,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53722,12 +53722,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53740,12 +53740,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53759,13 +53759,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -53780,13 +53780,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -53797,10 +53797,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -53811,10 +53811,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSSE3_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -53825,13 +53825,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -53841,10 +53841,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -53855,10 +53855,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -53869,10 +53869,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -53885,12 +53885,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -53902,11 +53902,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -53918,11 +53918,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -53933,11 +53933,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -53949,11 +53949,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -53967,13 +53967,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -53984,11 +53984,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -54000,11 +54000,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -54018,13 +54018,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -54035,11 +54035,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -54051,11 +54051,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -54069,13 +54069,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54087,12 +54087,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54105,12 +54105,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54123,12 +54123,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54142,13 +54142,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54160,12 +54160,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54178,12 +54178,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54196,12 +54196,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54215,13 +54215,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -54236,13 +54236,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -54253,10 +54253,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -54267,10 +54267,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSSE3_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -54281,13 +54281,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -54297,10 +54297,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -54311,10 +54311,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -54325,10 +54325,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -54341,12 +54341,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -54358,11 +54358,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -54374,11 +54374,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -54389,11 +54389,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -54405,11 +54405,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -54423,13 +54423,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -54440,11 +54440,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -54456,11 +54456,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -54474,13 +54474,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -54491,11 +54491,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -54507,11 +54507,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -54525,13 +54525,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54543,12 +54543,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54561,12 +54561,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54579,12 +54579,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54598,13 +54598,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54616,12 +54616,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54634,12 +54634,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54652,12 +54652,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54671,13 +54671,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -54692,13 +54692,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -54709,10 +54709,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -54723,10 +54723,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSSE3_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -54737,13 +54737,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -54753,10 +54753,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -54767,10 +54767,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -54781,10 +54781,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -54797,12 +54797,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -54814,11 +54814,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -54830,11 +54830,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -54845,11 +54845,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -54861,11 +54861,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -54879,13 +54879,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -54896,11 +54896,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -54912,11 +54912,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -54930,13 +54930,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -54947,11 +54947,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -54963,11 +54963,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -54981,13 +54981,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54999,12 +54999,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55017,12 +55017,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55035,12 +55035,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55054,13 +55054,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55072,12 +55072,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55090,12 +55090,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55108,12 +55108,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55127,13 +55127,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -55148,13 +55148,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -55165,10 +55165,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -55179,10 +55179,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSSE3_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -55193,13 +55193,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -55209,10 +55209,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -55223,10 +55223,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -55237,10 +55237,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -55253,12 +55253,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -55270,11 +55270,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -55286,11 +55286,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -55301,11 +55301,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -55317,11 +55317,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -55335,13 +55335,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -55352,11 +55352,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -55368,11 +55368,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -55386,13 +55386,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -55403,11 +55403,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -55419,11 +55419,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -55437,13 +55437,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55455,12 +55455,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55473,12 +55473,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55491,12 +55491,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55510,13 +55510,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55528,12 +55528,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55546,12 +55546,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55564,12 +55564,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55583,13 +55583,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -55604,13 +55604,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -55621,10 +55621,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -55635,10 +55635,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__SSE41_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -55649,13 +55649,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -55665,10 +55665,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -55679,10 +55679,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -55693,10 +55693,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -55709,12 +55709,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -55726,11 +55726,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -55742,11 +55742,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -55757,11 +55757,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -55773,11 +55773,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -55791,13 +55791,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -55808,11 +55808,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -55824,11 +55824,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -55842,13 +55842,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -55859,11 +55859,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -55875,11 +55875,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -55893,13 +55893,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55911,12 +55911,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55929,12 +55929,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55947,12 +55947,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55966,13 +55966,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55984,12 +55984,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56002,12 +56002,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56020,12 +56020,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56039,13 +56039,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -56060,13 +56060,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -56077,10 +56077,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -56091,10 +56091,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__SSE41_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -56105,13 +56105,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -56121,10 +56121,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -56135,10 +56135,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -56149,10 +56149,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -56165,12 +56165,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -56182,11 +56182,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -56198,11 +56198,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -56213,11 +56213,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -56229,11 +56229,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -56247,13 +56247,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -56264,11 +56264,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -56280,11 +56280,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -56298,13 +56298,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -56315,11 +56315,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -56331,11 +56331,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -56349,13 +56349,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56367,12 +56367,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56385,12 +56385,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56403,12 +56403,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56422,13 +56422,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56440,12 +56440,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56458,12 +56458,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56476,12 +56476,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56495,13 +56495,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -56516,13 +56516,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -56533,10 +56533,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -56547,10 +56547,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__SSE41_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -56561,13 +56561,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -56577,10 +56577,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -56591,10 +56591,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -56605,10 +56605,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -56621,12 +56621,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -56638,11 +56638,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -56654,11 +56654,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -56669,11 +56669,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -56685,11 +56685,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -56703,13 +56703,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -56720,11 +56720,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -56736,11 +56736,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -56754,13 +56754,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -56771,11 +56771,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -56787,11 +56787,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -56805,13 +56805,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56823,12 +56823,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56841,12 +56841,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56859,12 +56859,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56878,13 +56878,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56896,12 +56896,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56914,12 +56914,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56932,12 +56932,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56951,13 +56951,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -56972,13 +56972,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -56989,10 +56989,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -57003,10 +57003,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__SSE41_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -57017,13 +57017,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -57033,10 +57033,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -57047,10 +57047,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -57061,10 +57061,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -57077,12 +57077,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -57094,11 +57094,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -57110,11 +57110,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -57125,11 +57125,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -57141,11 +57141,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -57159,13 +57159,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -57176,11 +57176,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -57192,11 +57192,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -57210,13 +57210,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -57227,11 +57227,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -57243,11 +57243,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -57261,13 +57261,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57279,12 +57279,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57297,12 +57297,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57315,12 +57315,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57334,13 +57334,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57352,12 +57352,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57370,12 +57370,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57388,12 +57388,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57407,13 +57407,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -57428,13 +57428,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -57445,10 +57445,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -57459,10 +57459,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__AVX_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -57473,13 +57473,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -57489,10 +57489,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -57503,10 +57503,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -57517,10 +57517,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -57533,12 +57533,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -57550,11 +57550,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -57566,11 +57566,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -57581,11 +57581,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -57597,11 +57597,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -57615,13 +57615,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -57632,11 +57632,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -57648,11 +57648,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -57666,13 +57666,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -57683,11 +57683,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -57699,11 +57699,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -57717,13 +57717,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57735,12 +57735,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57753,12 +57753,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57771,12 +57771,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57790,13 +57790,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57808,12 +57808,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57826,12 +57826,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57844,12 +57844,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57863,13 +57863,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -57884,13 +57884,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -57901,10 +57901,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -57915,10 +57915,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__AVX_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -57929,13 +57929,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -57945,10 +57945,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -57959,10 +57959,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -57973,10 +57973,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -57989,12 +57989,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -58006,11 +58006,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -58022,11 +58022,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -58037,11 +58037,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -58053,11 +58053,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -58071,13 +58071,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -58088,11 +58088,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -58104,11 +58104,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -58122,13 +58122,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -58139,11 +58139,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -58155,11 +58155,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -58173,13 +58173,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58191,12 +58191,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58209,12 +58209,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58227,12 +58227,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58246,13 +58246,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58264,12 +58264,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58282,12 +58282,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58300,12 +58300,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58319,13 +58319,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -58340,13 +58340,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -58357,10 +58357,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -58371,10 +58371,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__AVX_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -58385,13 +58385,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -58401,10 +58401,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -58415,10 +58415,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -58429,10 +58429,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -58445,12 +58445,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -58462,11 +58462,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -58478,11 +58478,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -58493,11 +58493,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -58509,11 +58509,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -58527,13 +58527,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -58544,11 +58544,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -58560,11 +58560,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -58578,13 +58578,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -58595,11 +58595,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -58611,11 +58611,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -58629,13 +58629,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58647,12 +58647,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58665,12 +58665,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58683,12 +58683,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58702,13 +58702,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58720,12 +58720,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58738,12 +58738,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58756,12 +58756,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58775,13 +58775,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -58796,13 +58796,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -58813,10 +58813,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -58827,10 +58827,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__AVX_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -58841,13 +58841,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -58857,10 +58857,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -58871,10 +58871,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -58885,10 +58885,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -58901,12 +58901,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -58918,11 +58918,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -58934,11 +58934,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -58949,11 +58949,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -58965,11 +58965,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -58983,13 +58983,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -59000,11 +59000,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -59016,11 +59016,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -59034,13 +59034,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -59051,11 +59051,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -59067,11 +59067,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -59085,13 +59085,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59103,12 +59103,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59121,12 +59121,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59139,12 +59139,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59158,13 +59158,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59176,12 +59176,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59194,12 +59194,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59212,12 +59212,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59231,13 +59231,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -59252,13 +59252,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -59269,10 +59269,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -59283,10 +59283,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C2__XOP_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -59297,13 +59297,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -59313,10 +59313,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -59327,10 +59327,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -59341,10 +59341,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -59357,12 +59357,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -59374,11 +59374,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -59390,11 +59390,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -59405,11 +59405,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -59421,11 +59421,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -59439,13 +59439,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -59456,11 +59456,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -59472,11 +59472,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -59490,13 +59490,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -59507,11 +59507,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -59523,11 +59523,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -59541,13 +59541,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59559,12 +59559,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59577,12 +59577,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59595,12 +59595,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59614,13 +59614,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59632,12 +59632,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59650,12 +59650,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59668,12 +59668,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59687,13 +59687,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -59708,13 +59708,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -59725,10 +59725,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -59739,10 +59739,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C2__XOP_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -59753,13 +59753,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -59769,10 +59769,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -59783,10 +59783,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -59797,10 +59797,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -59813,12 +59813,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -59830,11 +59830,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -59846,11 +59846,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -59861,11 +59861,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -59877,11 +59877,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -59895,13 +59895,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -59912,11 +59912,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -59928,11 +59928,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -59946,13 +59946,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -59963,11 +59963,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -59979,11 +59979,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -59997,13 +59997,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60015,12 +60015,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60033,12 +60033,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60051,12 +60051,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60070,13 +60070,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60088,12 +60088,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60106,12 +60106,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60124,12 +60124,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60143,13 +60143,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -60164,13 +60164,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -60181,10 +60181,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -60195,10 +60195,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C2__XOP_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -60209,13 +60209,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -60225,10 +60225,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -60239,10 +60239,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -60253,10 +60253,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -60269,12 +60269,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -60286,11 +60286,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -60302,11 +60302,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -60317,11 +60317,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -60333,11 +60333,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -60351,13 +60351,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -60368,11 +60368,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -60384,11 +60384,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -60402,13 +60402,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -60419,11 +60419,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -60435,11 +60435,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -60453,13 +60453,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60471,12 +60471,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60489,12 +60489,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60507,12 +60507,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60526,13 +60526,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60544,12 +60544,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60562,12 +60562,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60580,12 +60580,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60599,13 +60599,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -60620,13 +60620,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -60637,10 +60637,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -60651,10 +60651,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X4C2__XOP_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -60665,13 +60665,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -60682,10 +60682,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -60697,10 +60697,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -60712,10 +60712,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -60729,12 +60729,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -60747,11 +60747,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -60764,11 +60764,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -60780,11 +60780,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -60797,11 +60797,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -60816,13 +60816,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -60834,11 +60834,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -60851,11 +60851,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -60870,13 +60870,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -60888,11 +60888,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -60905,11 +60905,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -60924,13 +60924,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60943,12 +60943,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60962,12 +60962,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60981,12 +60981,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61001,13 +61001,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61020,12 +61020,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61039,12 +61039,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61058,12 +61058,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61078,13 +61078,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -61100,13 +61100,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE2, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -61118,13 +61118,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -61135,10 +61135,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -61150,10 +61150,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -61165,10 +61165,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -61182,12 +61182,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -61200,11 +61200,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -61217,11 +61217,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -61233,11 +61233,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -61250,11 +61250,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -61269,13 +61269,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -61287,11 +61287,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -61304,11 +61304,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -61323,13 +61323,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -61341,11 +61341,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -61358,11 +61358,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -61377,13 +61377,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61396,12 +61396,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61415,12 +61415,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61434,12 +61434,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61454,13 +61454,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61473,12 +61473,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61492,12 +61492,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61511,12 +61511,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61531,13 +61531,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -61553,13 +61553,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE2, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -61571,13 +61571,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -61588,10 +61588,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -61603,10 +61603,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -61618,10 +61618,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -61635,12 +61635,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -61653,11 +61653,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -61670,11 +61670,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -61686,11 +61686,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -61703,11 +61703,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -61722,13 +61722,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -61740,11 +61740,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -61757,11 +61757,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -61776,13 +61776,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -61794,11 +61794,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -61811,11 +61811,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -61830,13 +61830,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61849,12 +61849,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61868,12 +61868,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61887,12 +61887,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61907,13 +61907,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61926,12 +61926,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61945,12 +61945,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61964,12 +61964,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61984,13 +61984,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -62006,13 +62006,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSSE3, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -62024,13 +62024,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -62041,10 +62041,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -62056,10 +62056,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -62071,10 +62071,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -62088,12 +62088,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -62106,11 +62106,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -62123,11 +62123,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -62139,11 +62139,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -62156,11 +62156,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -62175,13 +62175,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -62193,11 +62193,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -62210,11 +62210,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -62229,13 +62229,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -62247,11 +62247,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -62264,11 +62264,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -62283,13 +62283,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62302,12 +62302,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62321,12 +62321,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62340,12 +62340,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62360,13 +62360,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62379,12 +62379,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62398,12 +62398,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62417,12 +62417,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62437,13 +62437,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -62459,13 +62459,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSSE3, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -62477,13 +62477,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -62494,10 +62494,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -62509,10 +62509,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -62524,10 +62524,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -62541,12 +62541,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -62559,11 +62559,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -62576,11 +62576,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -62592,11 +62592,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -62609,11 +62609,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -62628,13 +62628,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -62646,11 +62646,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -62663,11 +62663,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -62682,13 +62682,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -62700,11 +62700,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -62717,11 +62717,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -62736,13 +62736,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62755,12 +62755,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62774,12 +62774,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62793,12 +62793,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62813,13 +62813,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62832,12 +62832,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62851,12 +62851,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62870,12 +62870,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62890,13 +62890,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -62912,13 +62912,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__SSE41, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -62930,13 +62930,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -62947,10 +62947,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -62962,10 +62962,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -62977,10 +62977,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -62994,12 +62994,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -63012,11 +63012,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -63029,11 +63029,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -63045,11 +63045,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -63062,11 +63062,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -63081,13 +63081,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -63099,11 +63099,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -63116,11 +63116,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -63135,13 +63135,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -63153,11 +63153,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -63170,11 +63170,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -63189,13 +63189,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63208,12 +63208,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63227,12 +63227,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63246,12 +63246,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63266,13 +63266,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63285,12 +63285,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63304,12 +63304,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63323,12 +63323,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63343,13 +63343,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -63365,13 +63365,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__SSE41, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -63383,13 +63383,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -63400,10 +63400,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -63415,10 +63415,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -63430,10 +63430,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -63447,12 +63447,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -63465,11 +63465,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -63482,11 +63482,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -63498,11 +63498,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -63515,11 +63515,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -63534,13 +63534,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -63552,11 +63552,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -63569,11 +63569,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -63588,13 +63588,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -63606,11 +63606,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -63623,11 +63623,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -63642,13 +63642,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63661,12 +63661,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63680,12 +63680,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63699,12 +63699,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63719,13 +63719,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63738,12 +63738,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63757,12 +63757,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63776,12 +63776,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63796,13 +63796,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -63818,13 +63818,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__AVX, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -63836,13 +63836,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -63853,10 +63853,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -63868,10 +63868,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -63883,10 +63883,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -63900,12 +63900,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -63918,11 +63918,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -63935,11 +63935,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -63951,11 +63951,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -63968,11 +63968,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -63987,13 +63987,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -64005,11 +64005,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -64022,11 +64022,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -64041,13 +64041,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -64059,11 +64059,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -64076,11 +64076,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -64095,13 +64095,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64114,12 +64114,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64133,12 +64133,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64152,12 +64152,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64172,13 +64172,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64191,12 +64191,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64210,12 +64210,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64229,12 +64229,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64249,13 +64249,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -64271,13 +64271,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__AVX, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -64289,13 +64289,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -64306,10 +64306,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -64321,10 +64321,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -64336,10 +64336,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -64353,12 +64353,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -64371,11 +64371,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -64388,11 +64388,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -64404,11 +64404,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -64421,11 +64421,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -64440,13 +64440,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -64458,11 +64458,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -64475,11 +64475,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -64494,13 +64494,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -64512,11 +64512,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -64529,11 +64529,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -64548,13 +64548,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64567,12 +64567,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64586,12 +64586,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64605,12 +64605,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64625,13 +64625,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64644,12 +64644,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64663,12 +64663,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64682,12 +64682,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64702,13 +64702,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -64724,13 +64724,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C2__XOP, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -64742,13 +64742,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -64759,10 +64759,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -64774,10 +64774,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -64789,10 +64789,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -64806,12 +64806,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -64824,11 +64824,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -64841,11 +64841,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -64857,11 +64857,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -64874,11 +64874,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -64893,13 +64893,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -64911,11 +64911,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -64928,11 +64928,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -64947,13 +64947,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -64965,11 +64965,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -64982,11 +64982,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -65001,13 +65001,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65020,12 +65020,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65039,12 +65039,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65058,12 +65058,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65078,13 +65078,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65097,12 +65097,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65116,12 +65116,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65135,12 +65135,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65155,13 +65155,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -65177,13 +65177,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_4X4C2__XOP, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -65195,13 +65195,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -65211,10 +65211,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -65225,10 +65225,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -65239,10 +65239,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -65255,12 +65255,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -65272,11 +65272,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -65288,11 +65288,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -65303,11 +65303,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -65319,11 +65319,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -65337,13 +65337,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -65354,11 +65354,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -65370,11 +65370,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -65388,13 +65388,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -65405,11 +65405,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -65421,11 +65421,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -65439,13 +65439,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65457,12 +65457,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65475,12 +65475,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65493,12 +65493,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65512,13 +65512,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65530,12 +65530,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65548,12 +65548,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65566,12 +65566,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65585,13 +65585,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -65606,13 +65606,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -65623,10 +65623,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -65637,10 +65637,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -65651,13 +65651,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -65667,10 +65667,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -65681,10 +65681,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -65695,10 +65695,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -65711,12 +65711,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -65728,11 +65728,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -65744,11 +65744,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -65759,11 +65759,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -65775,11 +65775,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -65793,13 +65793,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -65810,11 +65810,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -65826,11 +65826,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -65844,13 +65844,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -65861,11 +65861,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -65877,11 +65877,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -65895,13 +65895,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65913,12 +65913,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65931,12 +65931,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65949,12 +65949,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65968,13 +65968,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65986,12 +65986,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66004,12 +66004,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66022,12 +66022,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66041,13 +66041,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -66062,13 +66062,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -66079,10 +66079,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -66093,10 +66093,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -66107,13 +66107,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -66123,10 +66123,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -66137,10 +66137,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -66151,10 +66151,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -66167,12 +66167,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -66184,11 +66184,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -66200,11 +66200,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -66215,11 +66215,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -66231,11 +66231,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -66249,13 +66249,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -66266,11 +66266,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -66282,11 +66282,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -66300,13 +66300,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -66317,11 +66317,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -66333,11 +66333,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -66351,13 +66351,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66369,12 +66369,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66387,12 +66387,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66405,12 +66405,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66424,13 +66424,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66442,12 +66442,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66460,12 +66460,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66478,12 +66478,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66497,13 +66497,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -66518,13 +66518,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -66535,10 +66535,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -66549,10 +66549,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -66563,13 +66563,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -66579,10 +66579,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -66593,10 +66593,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -66607,10 +66607,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -66623,12 +66623,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -66640,11 +66640,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -66656,11 +66656,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -66671,11 +66671,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -66687,11 +66687,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -66705,13 +66705,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -66722,11 +66722,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -66738,11 +66738,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -66756,13 +66756,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -66773,11 +66773,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -66789,11 +66789,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -66807,13 +66807,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66825,12 +66825,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66843,12 +66843,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66861,12 +66861,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66880,13 +66880,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66898,12 +66898,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66916,12 +66916,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66934,12 +66934,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66953,13 +66953,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -66974,13 +66974,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -66991,10 +66991,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -67005,10 +67005,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -67019,13 +67019,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -67035,10 +67035,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -67049,10 +67049,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -67063,10 +67063,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -67079,12 +67079,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -67096,11 +67096,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -67112,11 +67112,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -67127,11 +67127,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -67143,11 +67143,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -67161,13 +67161,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -67178,11 +67178,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -67194,11 +67194,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -67212,13 +67212,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -67229,11 +67229,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -67245,11 +67245,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -67263,13 +67263,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67281,12 +67281,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67299,12 +67299,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67317,12 +67317,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67336,13 +67336,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67354,12 +67354,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67372,12 +67372,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67390,12 +67390,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67409,13 +67409,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -67430,13 +67430,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -67447,10 +67447,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -67461,10 +67461,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -67475,13 +67475,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -67491,10 +67491,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -67505,10 +67505,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -67519,10 +67519,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -67535,12 +67535,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -67552,11 +67552,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -67568,11 +67568,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -67583,11 +67583,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -67599,11 +67599,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -67617,13 +67617,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -67634,11 +67634,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -67650,11 +67650,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -67668,13 +67668,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -67685,11 +67685,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -67701,11 +67701,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -67719,13 +67719,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67737,12 +67737,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67755,12 +67755,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67773,12 +67773,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67792,13 +67792,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67810,12 +67810,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67828,12 +67828,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67846,12 +67846,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67865,13 +67865,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -67886,13 +67886,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -67903,10 +67903,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -67917,10 +67917,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -67931,13 +67931,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -67947,10 +67947,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -67961,10 +67961,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -67975,10 +67975,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -67991,12 +67991,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -68008,11 +68008,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -68024,11 +68024,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -68039,11 +68039,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -68055,11 +68055,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -68073,13 +68073,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -68090,11 +68090,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -68106,11 +68106,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -68124,13 +68124,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -68141,11 +68141,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -68157,11 +68157,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -68175,13 +68175,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68193,12 +68193,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68211,12 +68211,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68229,12 +68229,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68248,13 +68248,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68266,12 +68266,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68284,12 +68284,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68302,12 +68302,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68321,13 +68321,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -68342,13 +68342,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -68359,10 +68359,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -68373,10 +68373,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -68387,13 +68387,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -68403,10 +68403,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -68417,10 +68417,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -68431,10 +68431,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -68447,12 +68447,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -68464,11 +68464,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -68480,11 +68480,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -68495,11 +68495,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -68511,11 +68511,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -68529,13 +68529,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -68546,11 +68546,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -68562,11 +68562,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -68580,13 +68580,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -68597,11 +68597,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -68613,11 +68613,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -68631,13 +68631,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68649,12 +68649,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68667,12 +68667,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68685,12 +68685,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68704,13 +68704,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68722,12 +68722,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68740,12 +68740,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68758,12 +68758,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68777,13 +68777,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -68798,13 +68798,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -68815,10 +68815,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -68829,10 +68829,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -68843,13 +68843,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -68859,10 +68859,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -68873,10 +68873,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -68887,10 +68887,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -68903,12 +68903,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -68920,11 +68920,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -68936,11 +68936,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -68951,11 +68951,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -68967,11 +68967,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -68985,13 +68985,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -69002,11 +69002,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -69018,11 +69018,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -69036,13 +69036,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -69053,11 +69053,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -69069,11 +69069,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -69087,13 +69087,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69105,12 +69105,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69123,12 +69123,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69141,12 +69141,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69160,13 +69160,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69178,12 +69178,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69196,12 +69196,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69214,12 +69214,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69233,13 +69233,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -69254,13 +69254,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -69271,10 +69271,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -69285,10 +69285,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -69299,13 +69299,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -69315,10 +69315,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -69329,10 +69329,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -69343,10 +69343,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -69359,12 +69359,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -69376,11 +69376,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -69392,11 +69392,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -69407,11 +69407,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -69423,11 +69423,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -69441,13 +69441,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -69458,11 +69458,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -69474,11 +69474,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -69492,13 +69492,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -69509,11 +69509,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -69525,11 +69525,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -69543,13 +69543,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69561,12 +69561,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69579,12 +69579,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69597,12 +69597,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69616,13 +69616,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69634,12 +69634,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69652,12 +69652,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69670,12 +69670,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69689,13 +69689,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -69710,13 +69710,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -69727,10 +69727,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -69741,10 +69741,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -69755,13 +69755,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -69771,10 +69771,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -69785,10 +69785,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -69799,10 +69799,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -69815,12 +69815,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -69832,11 +69832,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -69848,11 +69848,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -69863,11 +69863,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -69879,11 +69879,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -69897,13 +69897,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -69914,11 +69914,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -69930,11 +69930,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -69948,13 +69948,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -69965,11 +69965,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -69981,11 +69981,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -69999,13 +69999,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70017,12 +70017,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70035,12 +70035,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70053,12 +70053,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70072,13 +70072,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70090,12 +70090,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70108,12 +70108,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70126,12 +70126,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70145,13 +70145,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -70166,13 +70166,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -70183,10 +70183,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -70197,10 +70197,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -70211,13 +70211,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -70227,10 +70227,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -70241,10 +70241,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -70255,10 +70255,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -70271,12 +70271,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -70288,11 +70288,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -70304,11 +70304,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -70319,11 +70319,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -70335,11 +70335,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -70353,13 +70353,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -70370,11 +70370,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -70386,11 +70386,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -70404,13 +70404,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -70421,11 +70421,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -70437,11 +70437,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -70455,13 +70455,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70473,12 +70473,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70491,12 +70491,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70509,12 +70509,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70528,13 +70528,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70546,12 +70546,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70564,12 +70564,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70582,12 +70582,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70601,13 +70601,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -70622,13 +70622,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -70639,10 +70639,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -70653,10 +70653,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -70667,13 +70667,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -70683,10 +70683,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -70697,10 +70697,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -70711,10 +70711,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -70727,12 +70727,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -70744,11 +70744,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -70760,11 +70760,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -70775,11 +70775,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -70791,11 +70791,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -70809,13 +70809,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -70826,11 +70826,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -70842,11 +70842,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -70860,13 +70860,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -70877,11 +70877,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -70893,11 +70893,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -70911,13 +70911,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70929,12 +70929,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70947,12 +70947,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70965,12 +70965,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70984,13 +70984,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71002,12 +71002,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71020,12 +71020,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71038,12 +71038,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71057,13 +71057,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -71078,13 +71078,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -71095,10 +71095,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -71109,10 +71109,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -71123,13 +71123,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -71139,10 +71139,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -71153,10 +71153,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -71167,10 +71167,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -71183,12 +71183,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -71200,11 +71200,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -71216,11 +71216,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -71231,11 +71231,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -71247,11 +71247,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -71265,13 +71265,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -71282,11 +71282,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -71298,11 +71298,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -71316,13 +71316,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -71333,11 +71333,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -71349,11 +71349,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -71367,13 +71367,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71385,12 +71385,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71403,12 +71403,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71421,12 +71421,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71440,13 +71440,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71458,12 +71458,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71476,12 +71476,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71494,12 +71494,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71513,13 +71513,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -71534,13 +71534,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -71551,10 +71551,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -71565,10 +71565,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -71579,13 +71579,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -71595,10 +71595,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -71609,10 +71609,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -71623,10 +71623,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -71639,12 +71639,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -71656,11 +71656,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -71672,11 +71672,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -71687,11 +71687,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -71703,11 +71703,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -71721,13 +71721,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -71738,11 +71738,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -71754,11 +71754,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -71772,13 +71772,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -71789,11 +71789,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -71805,11 +71805,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -71823,13 +71823,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71841,12 +71841,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71859,12 +71859,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71877,12 +71877,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71896,13 +71896,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71914,12 +71914,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71932,12 +71932,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71950,12 +71950,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71969,13 +71969,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -71990,13 +71990,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -72007,10 +72007,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -72021,10 +72021,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -72035,13 +72035,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -72051,10 +72051,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -72065,10 +72065,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -72079,10 +72079,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -72095,12 +72095,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -72112,11 +72112,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -72128,11 +72128,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -72143,11 +72143,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -72159,11 +72159,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -72177,13 +72177,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -72194,11 +72194,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -72210,11 +72210,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -72228,13 +72228,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -72245,11 +72245,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -72261,11 +72261,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -72279,13 +72279,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72297,12 +72297,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72315,12 +72315,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72333,12 +72333,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72352,13 +72352,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72370,12 +72370,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72388,12 +72388,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72406,12 +72406,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72425,13 +72425,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -72446,13 +72446,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -72463,10 +72463,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -72477,10 +72477,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE2_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -72491,13 +72491,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -72507,10 +72507,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -72521,10 +72521,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -72535,10 +72535,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -72551,12 +72551,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -72568,11 +72568,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -72584,11 +72584,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -72599,11 +72599,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -72615,11 +72615,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -72633,13 +72633,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -72650,11 +72650,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -72666,11 +72666,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -72684,13 +72684,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -72701,11 +72701,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -72717,11 +72717,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -72735,13 +72735,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72753,12 +72753,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72771,12 +72771,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72789,12 +72789,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72808,13 +72808,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72826,12 +72826,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72844,12 +72844,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72862,12 +72862,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72881,13 +72881,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -72902,13 +72902,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -72919,10 +72919,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -72933,10 +72933,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE2_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -72947,13 +72947,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -72963,10 +72963,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -72977,10 +72977,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -72991,10 +72991,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -73007,12 +73007,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -73024,11 +73024,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -73040,11 +73040,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -73055,11 +73055,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -73071,11 +73071,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -73089,13 +73089,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -73106,11 +73106,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -73122,11 +73122,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -73140,13 +73140,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -73157,11 +73157,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -73173,11 +73173,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -73191,13 +73191,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73209,12 +73209,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73227,12 +73227,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73245,12 +73245,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73264,13 +73264,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73282,12 +73282,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73300,12 +73300,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73318,12 +73318,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73337,13 +73337,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -73358,13 +73358,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -73375,10 +73375,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -73389,10 +73389,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE2_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -73403,13 +73403,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -73419,10 +73419,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -73433,10 +73433,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -73447,10 +73447,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -73463,12 +73463,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -73480,11 +73480,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -73496,11 +73496,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -73511,11 +73511,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -73527,11 +73527,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -73545,13 +73545,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -73562,11 +73562,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -73578,11 +73578,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -73596,13 +73596,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -73613,11 +73613,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -73629,11 +73629,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -73647,13 +73647,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73665,12 +73665,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73683,12 +73683,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73701,12 +73701,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73720,13 +73720,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73738,12 +73738,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73756,12 +73756,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73774,12 +73774,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73793,13 +73793,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -73814,13 +73814,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -73831,10 +73831,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -73845,10 +73845,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSSE3_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -73859,13 +73859,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -73875,10 +73875,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -73889,10 +73889,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -73903,10 +73903,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -73919,12 +73919,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -73936,11 +73936,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -73952,11 +73952,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -73967,11 +73967,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -73983,11 +73983,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -74001,13 +74001,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -74018,11 +74018,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -74034,11 +74034,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -74052,13 +74052,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -74069,11 +74069,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -74085,11 +74085,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -74103,13 +74103,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74121,12 +74121,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74139,12 +74139,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74157,12 +74157,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74176,13 +74176,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74194,12 +74194,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74212,12 +74212,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74230,12 +74230,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74249,13 +74249,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -74270,13 +74270,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -74287,10 +74287,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -74301,10 +74301,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSSE3_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -74315,13 +74315,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -74331,10 +74331,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -74345,10 +74345,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -74359,10 +74359,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -74375,12 +74375,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -74392,11 +74392,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -74408,11 +74408,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -74423,11 +74423,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -74439,11 +74439,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -74457,13 +74457,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -74474,11 +74474,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -74490,11 +74490,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -74508,13 +74508,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -74525,11 +74525,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -74541,11 +74541,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -74559,13 +74559,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74577,12 +74577,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74595,12 +74595,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74613,12 +74613,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74632,13 +74632,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74650,12 +74650,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74668,12 +74668,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74686,12 +74686,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74705,13 +74705,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -74726,13 +74726,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -74743,10 +74743,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -74757,10 +74757,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSSE3_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -74771,13 +74771,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -74787,10 +74787,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -74801,10 +74801,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -74815,10 +74815,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -74831,12 +74831,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -74848,11 +74848,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -74864,11 +74864,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -74879,11 +74879,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -74895,11 +74895,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -74913,13 +74913,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -74930,11 +74930,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -74946,11 +74946,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -74964,13 +74964,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -74981,11 +74981,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -74997,11 +74997,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -75015,13 +75015,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75033,12 +75033,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75051,12 +75051,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75069,12 +75069,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75088,13 +75088,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75106,12 +75106,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75124,12 +75124,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75142,12 +75142,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75161,13 +75161,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -75182,13 +75182,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -75199,10 +75199,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -75213,10 +75213,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__SSE41_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -75227,13 +75227,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -75243,10 +75243,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -75257,10 +75257,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -75271,10 +75271,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -75287,12 +75287,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -75304,11 +75304,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -75320,11 +75320,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -75335,11 +75335,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -75351,11 +75351,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -75369,13 +75369,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -75386,11 +75386,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -75402,11 +75402,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -75420,13 +75420,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -75437,11 +75437,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -75453,11 +75453,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -75471,13 +75471,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75489,12 +75489,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75507,12 +75507,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75525,12 +75525,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75544,13 +75544,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75562,12 +75562,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75580,12 +75580,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75598,12 +75598,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75617,13 +75617,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -75638,13 +75638,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -75655,10 +75655,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -75669,10 +75669,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__SSE41_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -75683,13 +75683,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -75699,10 +75699,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -75713,10 +75713,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -75727,10 +75727,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -75743,12 +75743,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -75760,11 +75760,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -75776,11 +75776,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -75791,11 +75791,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -75807,11 +75807,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -75825,13 +75825,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -75842,11 +75842,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -75858,11 +75858,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -75876,13 +75876,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -75893,11 +75893,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -75909,11 +75909,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -75927,13 +75927,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75945,12 +75945,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75963,12 +75963,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75981,12 +75981,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76000,13 +76000,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76018,12 +76018,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76036,12 +76036,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76054,12 +76054,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76073,13 +76073,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -76094,13 +76094,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -76111,10 +76111,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -76125,10 +76125,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__SSE41_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -76139,13 +76139,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -76155,10 +76155,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -76169,10 +76169,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -76183,10 +76183,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -76199,12 +76199,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -76216,11 +76216,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -76232,11 +76232,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -76247,11 +76247,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -76263,11 +76263,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -76281,13 +76281,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -76298,11 +76298,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -76314,11 +76314,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -76332,13 +76332,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -76349,11 +76349,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -76365,11 +76365,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -76383,13 +76383,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76401,12 +76401,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76419,12 +76419,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76437,12 +76437,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76456,13 +76456,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76474,12 +76474,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76492,12 +76492,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76510,12 +76510,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76529,13 +76529,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -76550,13 +76550,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -76567,10 +76567,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -76581,10 +76581,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__AVX_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -76595,13 +76595,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -76611,10 +76611,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -76625,10 +76625,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -76639,10 +76639,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -76655,12 +76655,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -76672,11 +76672,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -76688,11 +76688,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -76703,11 +76703,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -76719,11 +76719,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -76737,13 +76737,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -76754,11 +76754,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -76770,11 +76770,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -76788,13 +76788,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -76805,11 +76805,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -76821,11 +76821,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -76839,13 +76839,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76857,12 +76857,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76875,12 +76875,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76893,12 +76893,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76912,13 +76912,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76930,12 +76930,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76948,12 +76948,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76966,12 +76966,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -76985,13 +76985,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -77006,13 +77006,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -77023,10 +77023,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -77037,10 +77037,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__AVX_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -77051,13 +77051,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -77067,10 +77067,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -77081,10 +77081,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -77095,10 +77095,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -77111,12 +77111,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -77128,11 +77128,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -77144,11 +77144,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -77159,11 +77159,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -77175,11 +77175,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -77193,13 +77193,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -77210,11 +77210,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -77226,11 +77226,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -77244,13 +77244,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -77261,11 +77261,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -77277,11 +77277,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -77295,13 +77295,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77313,12 +77313,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77331,12 +77331,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77349,12 +77349,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77368,13 +77368,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77386,12 +77386,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77404,12 +77404,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77422,12 +77422,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77441,13 +77441,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -77462,13 +77462,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -77479,10 +77479,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -77493,10 +77493,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__AVX_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -77507,13 +77507,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -77523,10 +77523,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -77537,10 +77537,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -77551,10 +77551,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -77567,12 +77567,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -77584,11 +77584,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -77600,11 +77600,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -77615,11 +77615,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -77631,11 +77631,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -77649,13 +77649,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -77666,11 +77666,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -77682,11 +77682,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -77700,13 +77700,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -77717,11 +77717,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -77733,11 +77733,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -77751,13 +77751,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77769,12 +77769,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77787,12 +77787,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77805,12 +77805,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77824,13 +77824,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77842,12 +77842,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77860,12 +77860,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77878,12 +77878,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -77897,13 +77897,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -77918,13 +77918,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -77935,10 +77935,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -77949,10 +77949,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__XOP_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -77963,13 +77963,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -77979,10 +77979,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -77993,10 +77993,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -78007,10 +78007,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -78023,12 +78023,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -78040,11 +78040,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -78056,11 +78056,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -78071,11 +78071,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -78087,11 +78087,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -78105,13 +78105,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -78122,11 +78122,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -78138,11 +78138,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -78156,13 +78156,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -78173,11 +78173,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -78189,11 +78189,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -78207,13 +78207,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78225,12 +78225,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78243,12 +78243,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78261,12 +78261,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78280,13 +78280,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78298,12 +78298,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78316,12 +78316,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78334,12 +78334,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78353,13 +78353,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -78374,13 +78374,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -78391,10 +78391,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -78405,10 +78405,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__XOP_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -78419,13 +78419,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -78435,10 +78435,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -78449,10 +78449,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -78463,10 +78463,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -78479,12 +78479,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -78496,11 +78496,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -78512,11 +78512,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -78527,11 +78527,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -78543,11 +78543,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -78561,13 +78561,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -78578,11 +78578,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -78594,11 +78594,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -78612,13 +78612,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -78629,11 +78629,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -78645,11 +78645,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -78663,13 +78663,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78681,12 +78681,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78699,12 +78699,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78717,12 +78717,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78736,13 +78736,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78754,12 +78754,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78772,12 +78772,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78790,12 +78790,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -78809,13 +78809,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -78830,13 +78830,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -78847,10 +78847,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -78861,10 +78861,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__XOP_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -78875,13 +78875,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -78892,10 +78892,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -78907,10 +78907,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -78922,10 +78922,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -78939,12 +78939,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -78957,11 +78957,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -78974,11 +78974,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -78990,11 +78990,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -79007,11 +79007,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -79026,13 +79026,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -79044,11 +79044,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -79061,11 +79061,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -79080,13 +79080,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -79098,11 +79098,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -79115,11 +79115,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -79134,13 +79134,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79153,12 +79153,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79172,12 +79172,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79191,12 +79191,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79211,13 +79211,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79230,12 +79230,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79249,12 +79249,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79268,12 +79268,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79288,13 +79288,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -79310,13 +79310,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE2, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -79328,13 +79328,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -79345,10 +79345,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -79360,10 +79360,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -79375,10 +79375,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -79392,12 +79392,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -79410,11 +79410,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -79427,11 +79427,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -79443,11 +79443,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -79460,11 +79460,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -79479,13 +79479,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -79497,11 +79497,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -79514,11 +79514,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -79533,13 +79533,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -79551,11 +79551,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -79568,11 +79568,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -79587,13 +79587,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79606,12 +79606,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79625,12 +79625,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79644,12 +79644,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79664,13 +79664,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79683,12 +79683,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79702,12 +79702,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79721,12 +79721,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -79741,13 +79741,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -79763,13 +79763,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE2, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -79781,13 +79781,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -79798,10 +79798,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -79813,10 +79813,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -79828,10 +79828,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -79845,12 +79845,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -79863,11 +79863,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -79880,11 +79880,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -79896,11 +79896,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -79913,11 +79913,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -79932,13 +79932,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -79950,11 +79950,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -79967,11 +79967,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -79986,13 +79986,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -80004,11 +80004,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -80021,11 +80021,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -80040,13 +80040,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80059,12 +80059,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80078,12 +80078,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80097,12 +80097,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80117,13 +80117,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80136,12 +80136,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80155,12 +80155,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80174,12 +80174,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80194,13 +80194,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -80216,13 +80216,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE2, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -80234,13 +80234,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -80251,10 +80251,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -80266,10 +80266,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -80281,10 +80281,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -80298,12 +80298,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -80316,11 +80316,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -80333,11 +80333,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -80349,11 +80349,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -80366,11 +80366,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -80385,13 +80385,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -80403,11 +80403,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -80420,11 +80420,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -80439,13 +80439,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -80457,11 +80457,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -80474,11 +80474,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -80493,13 +80493,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80512,12 +80512,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80531,12 +80531,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80550,12 +80550,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80570,13 +80570,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80589,12 +80589,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80608,12 +80608,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80627,12 +80627,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80647,13 +80647,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -80669,13 +80669,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSSE3, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -80687,13 +80687,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -80704,10 +80704,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -80719,10 +80719,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -80734,10 +80734,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -80751,12 +80751,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -80769,11 +80769,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -80786,11 +80786,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -80802,11 +80802,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -80819,11 +80819,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -80838,13 +80838,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -80856,11 +80856,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -80873,11 +80873,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -80892,13 +80892,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -80910,11 +80910,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -80927,11 +80927,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -80946,13 +80946,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80965,12 +80965,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -80984,12 +80984,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81003,12 +81003,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81023,13 +81023,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81042,12 +81042,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81061,12 +81061,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81080,12 +81080,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81100,13 +81100,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -81122,13 +81122,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSSE3, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -81140,13 +81140,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -81157,10 +81157,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -81172,10 +81172,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -81187,10 +81187,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -81204,12 +81204,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -81222,11 +81222,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -81239,11 +81239,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -81255,11 +81255,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -81272,11 +81272,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -81291,13 +81291,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -81309,11 +81309,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -81326,11 +81326,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -81345,13 +81345,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -81363,11 +81363,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -81380,11 +81380,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -81399,13 +81399,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81418,12 +81418,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81437,12 +81437,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81456,12 +81456,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81476,13 +81476,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81495,12 +81495,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81514,12 +81514,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81533,12 +81533,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81553,13 +81553,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -81575,13 +81575,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSSE3, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -81593,13 +81593,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -81610,10 +81610,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -81625,10 +81625,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -81640,10 +81640,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -81657,12 +81657,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -81675,11 +81675,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -81692,11 +81692,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -81708,11 +81708,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -81725,11 +81725,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -81744,13 +81744,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -81762,11 +81762,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -81779,11 +81779,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -81798,13 +81798,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -81816,11 +81816,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -81833,11 +81833,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -81852,13 +81852,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81871,12 +81871,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81890,12 +81890,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81909,12 +81909,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81929,13 +81929,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81948,12 +81948,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81967,12 +81967,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -81986,12 +81986,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82006,13 +82006,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -82028,13 +82028,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__SSE41, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -82046,13 +82046,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -82063,10 +82063,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -82078,10 +82078,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -82093,10 +82093,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -82110,12 +82110,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -82128,11 +82128,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -82145,11 +82145,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -82161,11 +82161,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -82178,11 +82178,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -82197,13 +82197,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -82215,11 +82215,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -82232,11 +82232,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -82251,13 +82251,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -82269,11 +82269,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -82286,11 +82286,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -82305,13 +82305,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82324,12 +82324,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82343,12 +82343,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82362,12 +82362,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82382,13 +82382,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82401,12 +82401,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82420,12 +82420,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82439,12 +82439,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82459,13 +82459,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -82481,13 +82481,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__SSE41, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -82499,13 +82499,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -82516,10 +82516,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -82531,10 +82531,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -82546,10 +82546,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -82563,12 +82563,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -82581,11 +82581,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -82598,11 +82598,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -82614,11 +82614,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -82631,11 +82631,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -82650,13 +82650,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -82668,11 +82668,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -82685,11 +82685,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -82704,13 +82704,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -82722,11 +82722,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_div_8_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -82739,11 +82739,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -82758,13 +82758,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82777,12 +82777,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82796,12 +82796,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82815,12 +82815,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82835,13 +82835,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82854,12 +82854,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82873,12 +82873,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, n_div_4_strided_a) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82892,12 +82892,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -82912,13 +82912,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -82934,13 +82934,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__SSE41, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -82952,13 +82952,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -82969,10 +82969,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -82984,10 +82984,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -82999,10 +82999,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -83016,12 +83016,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -83034,11 +83034,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -83051,11 +83051,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -83067,11 +83067,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -83084,11 +83084,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -83103,13 +83103,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -83121,11 +83121,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -83138,11 +83138,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -83157,13 +83157,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -83175,11 +83175,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -83192,11 +83192,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -83211,13 +83211,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83230,12 +83230,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83249,12 +83249,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83268,12 +83268,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83288,13 +83288,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83307,12 +83307,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83326,12 +83326,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83345,12 +83345,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83365,13 +83365,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -83387,13 +83387,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__AVX, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -83405,13 +83405,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -83422,10 +83422,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -83437,10 +83437,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -83452,10 +83452,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -83469,12 +83469,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -83487,11 +83487,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -83504,11 +83504,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -83520,11 +83520,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -83537,11 +83537,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -83556,13 +83556,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -83574,11 +83574,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -83591,11 +83591,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -83610,13 +83610,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -83628,11 +83628,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -83645,11 +83645,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -83664,13 +83664,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83683,12 +83683,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83702,12 +83702,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83721,12 +83721,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83741,13 +83741,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83760,12 +83760,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83779,12 +83779,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83798,12 +83798,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -83818,13 +83818,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -83840,13 +83840,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__AVX, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -83858,13 +83858,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -83875,10 +83875,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -83890,10 +83890,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -83905,10 +83905,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -83922,12 +83922,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -83940,11 +83940,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -83957,11 +83957,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -83973,11 +83973,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -83990,11 +83990,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -84009,13 +84009,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -84027,11 +84027,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -84044,11 +84044,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -84063,13 +84063,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -84081,11 +84081,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -84098,11 +84098,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -84117,13 +84117,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84136,12 +84136,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84155,12 +84155,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84174,12 +84174,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84194,13 +84194,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84213,12 +84213,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84232,12 +84232,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, n_div_4_strided_a) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84251,12 +84251,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84271,13 +84271,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -84293,13 +84293,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__AVX, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -84311,13 +84311,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -84328,10 +84328,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -84343,10 +84343,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -84358,10 +84358,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -84375,12 +84375,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -84393,11 +84393,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -84410,11 +84410,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -84426,11 +84426,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -84443,11 +84443,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -84462,13 +84462,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -84480,11 +84480,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -84497,11 +84497,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -84516,13 +84516,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -84534,11 +84534,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -84551,11 +84551,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -84570,13 +84570,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84589,12 +84589,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84608,12 +84608,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84627,12 +84627,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84647,13 +84647,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84666,12 +84666,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84685,12 +84685,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84704,12 +84704,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -84724,13 +84724,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -84746,13 +84746,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__XOP, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -84764,13 +84764,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -84781,10 +84781,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -84796,10 +84796,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -84811,10 +84811,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -84828,12 +84828,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -84846,11 +84846,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -84863,11 +84863,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -84879,11 +84879,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -84896,11 +84896,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -84915,13 +84915,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -84933,11 +84933,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -84950,11 +84950,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -84969,13 +84969,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -84987,11 +84987,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -85004,11 +85004,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -85023,13 +85023,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85042,12 +85042,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85061,12 +85061,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85080,12 +85080,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85100,13 +85100,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85119,12 +85119,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85138,12 +85138,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85157,12 +85157,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85177,13 +85177,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -85199,13 +85199,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__XOP, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -85217,13 +85217,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -85234,10 +85234,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -85249,10 +85249,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -85264,10 +85264,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -85281,12 +85281,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -85299,11 +85299,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -85316,11 +85316,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -85332,11 +85332,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -85349,11 +85349,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -85368,13 +85368,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -85386,11 +85386,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -85403,11 +85403,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -85422,13 +85422,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -85440,11 +85440,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_div_8_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -85457,11 +85457,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -85476,13 +85476,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85495,12 +85495,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85514,12 +85514,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, n_gt_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85533,12 +85533,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85553,13 +85553,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85572,12 +85572,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85591,12 +85591,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, n_div_4_strided_a) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85610,12 +85610,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85630,13 +85630,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -85652,13 +85652,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__XOP, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -85670,13 +85670,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(1)
@@ -85686,10 +85686,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(1)
@@ -85700,10 +85700,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(1)
@@ -85714,10 +85714,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -85730,12 +85730,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -85747,11 +85747,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -85763,11 +85763,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_lt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -85778,11 +85778,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -85794,11 +85794,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -85812,13 +85812,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -85829,11 +85829,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -85845,11 +85845,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -85863,13 +85863,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -85880,11 +85880,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -85896,11 +85896,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -85914,13 +85914,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85932,12 +85932,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_gt_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85950,12 +85950,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85968,12 +85968,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -85987,13 +85987,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86005,12 +86005,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_div_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86023,12 +86023,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86041,12 +86041,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86060,13 +86060,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -86081,13 +86081,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, qmin) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(1)
@@ -86098,10 +86098,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, qmax) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(1)
@@ -86112,10 +86112,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X8C8__AVX2, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, strided_cm) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(1)
@@ -86126,13 +86126,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(2)
@@ -86142,10 +86142,10 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(2)
@@ -86156,10 +86156,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(2)
@@ -86170,10 +86170,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -86186,12 +86186,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -86203,11 +86203,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -86219,11 +86219,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_lt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -86234,11 +86234,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -86250,11 +86250,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -86268,13 +86268,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -86285,11 +86285,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -86301,11 +86301,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -86319,13 +86319,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -86336,11 +86336,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -86352,11 +86352,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -86370,13 +86370,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86388,12 +86388,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_gt_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86406,12 +86406,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86424,12 +86424,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86443,13 +86443,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86461,12 +86461,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_div_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86479,12 +86479,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86497,12 +86497,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86516,13 +86516,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -86537,13 +86537,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, qmin) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(2)
@@ -86554,10 +86554,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, qmax) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(2)
@@ -86568,10 +86568,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X8C8__AVX2, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, strided_cm) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(2)
@@ -86582,13 +86582,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(3)
@@ -86598,10 +86598,10 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(3)
@@ -86612,10 +86612,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(3)
@@ -86626,10 +86626,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -86642,12 +86642,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -86659,11 +86659,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -86675,11 +86675,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_lt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -86690,11 +86690,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -86706,11 +86706,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -86724,13 +86724,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -86741,11 +86741,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -86757,11 +86757,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -86775,13 +86775,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -86792,11 +86792,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -86808,11 +86808,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -86826,13 +86826,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, n_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86844,12 +86844,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_gt_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86862,12 +86862,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86880,12 +86880,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, n_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86899,13 +86899,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, n_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86917,12 +86917,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_div_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86935,12 +86935,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, n_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86953,12 +86953,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, n_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -86972,13 +86972,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -86993,13 +86993,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, qmin) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(3)
@@ -87010,10 +87010,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, qmax) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(3)
@@ -87024,10 +87024,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X8C8__AVX2, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, strided_cm) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(3)
@@ -87038,13 +87038,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -87055,10 +87055,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -87070,10 +87070,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -87085,10 +87085,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -87102,12 +87102,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -87120,11 +87120,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -87137,11 +87137,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_lt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -87153,11 +87153,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -87170,11 +87170,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -87189,13 +87189,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -87207,11 +87207,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -87224,11 +87224,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -87243,13 +87243,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -87261,11 +87261,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -87278,11 +87278,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -87297,13 +87297,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, n_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, n_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87316,12 +87316,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, n_gt_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87335,12 +87335,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, n_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87354,12 +87354,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, n_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, n_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87374,13 +87374,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, n_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, n_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87393,12 +87393,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, n_div_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87412,12 +87412,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, n_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, n_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87431,12 +87431,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, n_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, n_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87451,13 +87451,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -87473,13 +87473,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X8C8__AVX2, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, strided_cm) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -87491,13 +87491,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -87508,10 +87508,10 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -87523,10 +87523,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -87538,10 +87538,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -87555,12 +87555,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -87573,11 +87573,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -87590,11 +87590,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_lt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -87606,11 +87606,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -87623,11 +87623,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -87642,13 +87642,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -87660,11 +87660,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -87677,11 +87677,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -87696,13 +87696,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -87714,11 +87714,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -87731,11 +87731,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -87750,13 +87750,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, n_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, n_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87769,12 +87769,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, n_gt_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87788,12 +87788,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, n_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87807,12 +87807,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, n_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, n_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87827,13 +87827,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, n_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, n_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87846,12 +87846,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, n_div_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87865,12 +87865,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, n_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, n_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87884,12 +87884,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, n_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, n_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -87904,13 +87904,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -87926,13 +87926,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X8C8__AVX2, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, strided_cm) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -87944,13 +87944,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -87961,10 +87961,10 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -87976,10 +87976,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -87991,10 +87991,10 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -88008,12 +88008,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -88026,11 +88026,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -88043,11 +88043,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_lt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -88059,11 +88059,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -88076,11 +88076,11 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -88095,13 +88095,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -88113,11 +88113,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -88130,11 +88130,11 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -88149,13 +88149,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -88167,11 +88167,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -88184,11 +88184,11 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -88203,13 +88203,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, n_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, n_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88222,12 +88222,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, n_gt_8_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, n_gt_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88241,12 +88241,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, n_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, n_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88260,12 +88260,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, n_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, n_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88280,13 +88280,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, n_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, n_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88299,12 +88299,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, n_div_8_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, n_div_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88318,12 +88318,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, n_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, n_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88337,12 +88337,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, n_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, n_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88357,13 +88357,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -88379,13 +88379,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X8C8__AVX2, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, strided_cm) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .extended_weights(true)
@@ -88397,13 +88397,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_eq_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(1)
@@ -88413,10 +88413,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(1)
@@ -88427,10 +88427,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(1)
@@ -88441,10 +88441,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -88457,12 +88457,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -88474,11 +88474,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -88490,11 +88490,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_lt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -88505,11 +88505,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -88521,11 +88521,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -88539,13 +88539,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_gt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -88556,11 +88556,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -88572,11 +88572,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -88590,13 +88590,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_div_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -88607,11 +88607,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -88623,11 +88623,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -88641,13 +88641,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_gt_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88659,12 +88659,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_gt_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88677,12 +88677,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_gt_16_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88695,12 +88695,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_gt_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88714,13 +88714,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_div_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88732,12 +88732,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_div_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88750,12 +88750,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_div_16_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88768,12 +88768,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_div_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -88787,13 +88787,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -88808,13 +88808,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, qmin) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(1)
@@ -88825,10 +88825,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, qmax) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(1)
@@ -88839,10 +88839,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X16C8__AVX512SKX, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, strided_cm) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(1)
@@ -88853,13 +88853,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_eq_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(2)
@@ -88869,10 +88869,10 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(2)
@@ -88883,10 +88883,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(2)
@@ -88897,10 +88897,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -88913,12 +88913,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -88930,11 +88930,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -88946,11 +88946,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_lt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -88961,11 +88961,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -88977,11 +88977,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -88995,13 +88995,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_gt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -89012,11 +89012,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -89028,11 +89028,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -89046,13 +89046,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_div_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -89063,11 +89063,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -89079,11 +89079,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -89097,13 +89097,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_gt_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89115,12 +89115,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_gt_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89133,12 +89133,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_gt_16_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89151,12 +89151,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_gt_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89170,13 +89170,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_div_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89188,12 +89188,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_div_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89206,12 +89206,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_div_16_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89224,12 +89224,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_div_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89243,13 +89243,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -89264,13 +89264,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, qmin) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(2)
@@ -89281,10 +89281,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, qmax) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(2)
@@ -89295,10 +89295,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X16C8__AVX512SKX, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, strided_cm) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(2)
@@ -89309,13 +89309,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_eq_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(3)
@@ -89325,10 +89325,10 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(3)
@@ -89339,10 +89339,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(3)
@@ -89353,10 +89353,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -89369,12 +89369,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -89386,11 +89386,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -89402,11 +89402,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_lt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -89417,11 +89417,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -89433,11 +89433,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -89451,13 +89451,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_gt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -89468,11 +89468,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -89484,11 +89484,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -89502,13 +89502,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_div_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -89519,11 +89519,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -89535,11 +89535,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -89553,13 +89553,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_gt_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89571,12 +89571,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89589,12 +89589,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_gt_16_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89607,12 +89607,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_gt_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89626,13 +89626,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_div_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89644,12 +89644,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_div_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89662,12 +89662,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_div_16_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89680,12 +89680,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_div_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -89699,13 +89699,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -89720,13 +89720,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, qmin) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(3)
@@ -89737,10 +89737,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, qmax) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(3)
@@ -89751,10 +89751,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X16C8__AVX512SKX, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, strided_cm) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(3)
@@ -89765,13 +89765,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_eq_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(4)
@@ -89781,10 +89781,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(4)
@@ -89795,10 +89795,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_eq_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(4)
@@ -89809,10 +89809,10 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -89825,12 +89825,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -89842,11 +89842,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -89858,11 +89858,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_lt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -89873,11 +89873,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_lt_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -89889,11 +89889,11 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -89907,13 +89907,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_gt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -89924,11 +89924,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_gt_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -89940,11 +89940,11 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -89958,13 +89958,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_div_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -89975,11 +89975,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_div_8_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -89991,11 +89991,11 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -90009,13 +90009,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, n_gt_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_gt_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -90027,12 +90027,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, n_gt_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_gt_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -90045,12 +90045,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, n_gt_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_gt_16_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -90063,12 +90063,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, n_gt_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_gt_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -90082,13 +90082,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, n_div_16) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_div_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -90100,12 +90100,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, n_div_16_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_div_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -90118,12 +90118,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, n_div_16_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_div_16_strided_a) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -90136,12 +90136,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, n_div_16_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_div_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -90155,13 +90155,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -90176,13 +90176,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, qmin) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(4)
@@ -90193,10 +90193,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, qmax) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(4)
@@ -90207,10 +90207,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_4X16C8__AVX512SKX, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, strided_cm) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(4)
@@ -90221,13 +90221,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_eq_8) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -90236,10 +90236,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, strided_cn) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -90249,10 +90249,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_eq_8_strided_a) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -90262,10 +90262,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -90277,12 +90277,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -90293,11 +90293,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -90308,11 +90308,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -90322,11 +90322,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_lt_8_strided_a) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -90337,11 +90337,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -90354,13 +90354,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -90370,11 +90370,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_gt_8_strided_a) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -90385,11 +90385,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -90402,13 +90402,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(1)
@@ -90418,11 +90418,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_div_8_strided_a) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(1)
@@ -90433,11 +90433,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -90450,13 +90450,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -90467,12 +90467,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -90484,12 +90484,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_gt_4_strided_a) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -90501,12 +90501,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 1; m++) {
@@ -90519,13 +90519,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -90536,12 +90536,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -90553,12 +90553,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_div_4_strided_a) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -90570,12 +90570,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 1; m++) {
@@ -90588,13 +90588,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -90608,13 +90608,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, qmin) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -90624,10 +90624,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, qmax) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -90637,10 +90637,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, strided_cm) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -90650,13 +90650,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_eq_8) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -90665,10 +90665,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, strided_cn) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -90678,10 +90678,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_eq_8_strided_a) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -90691,10 +90691,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -90706,12 +90706,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -90722,11 +90722,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -90737,11 +90737,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -90751,11 +90751,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_lt_8_strided_a) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -90766,11 +90766,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -90783,13 +90783,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -90799,11 +90799,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_gt_8_strided_a) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -90814,11 +90814,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -90831,13 +90831,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(2)
@@ -90847,11 +90847,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_div_8_strided_a) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(2)
@@ -90862,11 +90862,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -90879,13 +90879,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -90896,12 +90896,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -90913,12 +90913,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_gt_4_strided_a) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -90930,12 +90930,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 2; m++) {
@@ -90948,13 +90948,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -90965,12 +90965,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -90982,12 +90982,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_div_4_strided_a) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -90999,12 +90999,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 2; m++) {
@@ -91017,13 +91017,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -91037,13 +91037,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, qmin) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -91053,10 +91053,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, qmax) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -91066,10 +91066,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, strided_cm) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -91079,13 +91079,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_eq_8) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -91094,10 +91094,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, strided_cn) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -91107,10 +91107,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_eq_8_strided_a) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -91120,10 +91120,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -91135,12 +91135,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -91151,11 +91151,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -91166,11 +91166,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -91180,11 +91180,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_lt_8_strided_a) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -91195,11 +91195,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -91212,13 +91212,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -91228,11 +91228,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_gt_8_strided_a) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -91243,11 +91243,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -91260,13 +91260,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(3)
@@ -91276,11 +91276,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_div_8_strided_a) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(3)
@@ -91291,11 +91291,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -91308,13 +91308,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -91325,12 +91325,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -91342,12 +91342,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_gt_4_strided_a) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -91359,12 +91359,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 3; m++) {
@@ -91377,13 +91377,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -91394,12 +91394,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -91411,12 +91411,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_div_4_strided_a) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -91428,12 +91428,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 3; m++) {
@@ -91446,13 +91446,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -91466,13 +91466,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, qmin) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -91482,10 +91482,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, qmax) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -91495,10 +91495,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD64, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, strided_cm) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -91508,13 +91508,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_eq_8) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -91523,10 +91523,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, strided_cn) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -91536,10 +91536,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_eq_8_strided_a) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -91549,10 +91549,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -91564,12 +91564,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -91580,11 +91580,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -91595,11 +91595,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -91609,11 +91609,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_lt_8_strided_a) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -91624,11 +91624,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -91641,13 +91641,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -91657,11 +91657,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_gt_8_strided_a) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -91672,11 +91672,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -91689,13 +91689,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(1)
@@ -91705,11 +91705,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_div_8_strided_a) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(1)
@@ -91720,11 +91720,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -91737,13 +91737,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -91754,12 +91754,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -91771,12 +91771,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_gt_4_strided_a) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -91788,12 +91788,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 1; m++) {
@@ -91806,13 +91806,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -91823,12 +91823,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -91840,12 +91840,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_div_4_strided_a) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -91857,12 +91857,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 1; m++) {
@@ -91875,13 +91875,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -91895,13 +91895,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, qmin) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -91911,10 +91911,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, qmax) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -91924,10 +91924,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_1X4C8__WASMSIMD_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, strided_cm) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -91937,13 +91937,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_eq_8) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -91952,10 +91952,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, strided_cn) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -91965,10 +91965,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_eq_8_strided_a) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -91978,10 +91978,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -91993,12 +91993,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -92009,11 +92009,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -92024,11 +92024,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -92038,11 +92038,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_lt_8_strided_a) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -92053,11 +92053,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -92070,13 +92070,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -92086,11 +92086,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_gt_8_strided_a) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -92101,11 +92101,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -92118,13 +92118,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(2)
@@ -92134,11 +92134,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_div_8_strided_a) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(2)
@@ -92149,11 +92149,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -92166,13 +92166,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -92183,12 +92183,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -92200,12 +92200,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_gt_4_strided_a) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -92217,12 +92217,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 2; m++) {
@@ -92235,13 +92235,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -92252,12 +92252,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -92269,12 +92269,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_div_4_strided_a) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -92286,12 +92286,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 2; m++) {
@@ -92304,13 +92304,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -92324,13 +92324,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, qmin) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -92340,10 +92340,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, qmax) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -92353,10 +92353,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_2X4C8__WASMSIMD_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, strided_cm) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -92366,13 +92366,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_eq_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_eq_8) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -92381,10 +92381,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, strided_cn) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -92394,10 +92394,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_eq_8_strided_a) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -92407,10 +92407,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -92422,12 +92422,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -92438,11 +92438,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -92453,11 +92453,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_lt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -92467,11 +92467,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_lt_8_strided_a) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -92482,11 +92482,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -92499,13 +92499,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_gt_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -92515,11 +92515,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_gt_8_strided_a) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -92530,11 +92530,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -92547,13 +92547,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_div_8) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(3)
@@ -92563,11 +92563,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_div_8_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_div_8_strided_a) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(3)
@@ -92578,11 +92578,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_div_8_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -92595,13 +92595,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_gt_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -92612,12 +92612,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -92629,12 +92629,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_gt_4_strided_a) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -92646,12 +92646,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 3; m++) {
@@ -92664,13 +92664,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_div_4) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -92681,12 +92681,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -92698,12 +92698,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_div_4_strided_a) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_div_4_strided_a) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -92715,12 +92715,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_div_4_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 3; m++) {
@@ -92733,13 +92733,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, strided_cm_subtile) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -92753,13 +92753,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, qmin) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, qmin) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -92769,10 +92769,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, qmax) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, qmax) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -92782,10 +92782,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_MINMAX_3X4C8__WASMSIMD_LD128, strided_cm) {
+  TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, strided_cm) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -92795,13 +92795,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_eq_8) {
     GemmMicrokernelTester()
       .extended_weights(true)
       .mr(1)
@@ -92811,10 +92811,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, strided_cn) {
     GemmMicrokernelTester()
       .extended_weights(true)
       .mr(1)
@@ -92825,10 +92825,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_eq_8_strided_a) {
     GemmMicrokernelTester()
       .extended_weights(true)
       .mr(1)
@@ -92839,10 +92839,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -92855,12 +92855,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -92872,11 +92872,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -92888,11 +92888,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -92903,11 +92903,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_lt_8_strided_a) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -92919,11 +92919,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -92937,13 +92937,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -92954,11 +92954,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_gt_8_strided_a) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -92970,11 +92970,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -92988,13 +92988,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93005,11 +93005,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_div_8_strided_a) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93021,11 +93021,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -93039,13 +93039,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93057,12 +93057,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93075,12 +93075,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, n_gt_4_strided_a) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93093,12 +93093,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 1; m++) {
@@ -93112,13 +93112,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93130,12 +93130,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93148,12 +93148,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, n_div_4_strided_a) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93166,12 +93166,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 1; m++) {
@@ -93185,13 +93185,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -93206,13 +93206,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_1X4C8__WASMSIMD, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, strided_cm) {
     GemmMicrokernelTester()
       .extended_weights(true)
       .mr(1)
@@ -93223,13 +93223,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_eq_8) {
     GemmMicrokernelTester()
       .extended_weights(true)
       .mr(2)
@@ -93239,10 +93239,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, strided_cn) {
     GemmMicrokernelTester()
       .extended_weights(true)
       .mr(2)
@@ -93253,10 +93253,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_eq_8_strided_a) {
     GemmMicrokernelTester()
       .extended_weights(true)
       .mr(2)
@@ -93267,10 +93267,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -93283,12 +93283,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93300,11 +93300,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93316,11 +93316,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93331,11 +93331,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_lt_8_strided_a) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93347,11 +93347,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -93365,13 +93365,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93382,11 +93382,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_gt_8_strided_a) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93398,11 +93398,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -93416,13 +93416,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93433,11 +93433,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_div_8_strided_a) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93449,11 +93449,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -93467,13 +93467,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93485,12 +93485,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93503,12 +93503,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, n_gt_4_strided_a) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93521,12 +93521,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 2; m++) {
@@ -93540,13 +93540,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93558,12 +93558,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93576,12 +93576,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, n_div_4_strided_a) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93594,12 +93594,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 2; m++) {
@@ -93613,13 +93613,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -93634,13 +93634,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_2X4C8__WASMSIMD, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, strided_cm) {
     GemmMicrokernelTester()
       .extended_weights(true)
       .mr(2)
@@ -93651,13 +93651,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, k_eq_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_eq_8) {
     GemmMicrokernelTester()
       .extended_weights(true)
       .mr(3)
@@ -93667,10 +93667,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, strided_cn) {
     GemmMicrokernelTester()
       .extended_weights(true)
       .mr(3)
@@ -93681,10 +93681,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, k_eq_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_eq_8_strided_a) {
     GemmMicrokernelTester()
       .extended_weights(true)
       .mr(3)
@@ -93695,10 +93695,10 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, k_eq_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -93711,12 +93711,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, k_eq_8_subtile_m) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93728,11 +93728,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, k_eq_8_subtile_n) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93744,11 +93744,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, k_lt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93759,11 +93759,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, k_lt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_lt_8_strided_a) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93775,11 +93775,11 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, k_lt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -93793,13 +93793,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, k_gt_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93810,11 +93810,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, k_gt_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_gt_8_strided_a) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93826,11 +93826,11 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, k_gt_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -93844,13 +93844,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, k_div_8) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93861,11 +93861,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, k_div_8_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_div_8_strided_a) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .extended_weights(true)
@@ -93877,11 +93877,11 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, k_div_8_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -93895,13 +93895,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, n_gt_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93913,12 +93913,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, n_gt_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93931,12 +93931,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, n_gt_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, n_gt_4_strided_a) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93949,12 +93949,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, n_gt_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 3; m++) {
@@ -93968,13 +93968,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, n_div_4) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -93986,12 +93986,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, n_div_4_strided_cn) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -94004,12 +94004,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, n_div_4_strided_a) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, n_div_4_strided_a) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -94022,12 +94022,12 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, n_div_4_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 3; m++) {
@@ -94041,13 +94041,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, strided_cm_subtile) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -94062,13 +94062,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_GEMM_XW_MINMAX_3X4C8__WASMSIMD, strided_cm) {
+  TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, strided_cm) {
     GemmMicrokernelTester()
       .extended_weights(true)
       .mr(3)
@@ -94079,12 +94079,12 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, k_eq_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(2)
@@ -94093,10 +94093,10 @@
     .m(1)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(2)
@@ -94106,10 +94106,10 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, k_eq_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_eq_1_strided_a) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(2)
@@ -94119,10 +94119,10 @@
     .n(2)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, k_eq_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 1; m++) {
     for (uint32_t n = 1; n <= 2; n++) {
       GemmMicrokernelTester()
@@ -94134,12 +94134,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 1; m++) {
     GemmMicrokernelTester()
       .mr(1)
@@ -94150,11 +94150,11 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 2; n++) {
     GemmMicrokernelTester()
       .mr(1)
@@ -94165,11 +94165,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, k_gt_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(1)
@@ -94179,11 +94179,11 @@
       .m(1)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, k_gt_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_gt_1_strided_a) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(1)
@@ -94194,11 +94194,11 @@
       .n(2)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, k_gt_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -94211,13 +94211,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, n_gt_2) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_gt_2) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94228,12 +94228,12 @@
         .m(1)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, n_gt_2_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_gt_2_strided_cn) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94245,12 +94245,12 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, n_gt_2_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_gt_2_strided_a) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94262,12 +94262,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, n_gt_2_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_gt_2_subtile) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -94280,13 +94280,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, n_div_2) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_div_2) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94297,12 +94297,12 @@
         .m(1)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, n_div_2_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_div_2_strided_cn) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94314,12 +94314,12 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, n_div_2_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_div_2_strided_a) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94331,12 +94331,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, n_div_2_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_div_2_subtile) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -94349,13 +94349,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, strided_cm_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -94369,13 +94369,13 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, qmin) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(2)
@@ -94385,10 +94385,10 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, qmax) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(2)
@@ -94398,10 +94398,10 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_1X2__SCALAR, strided_cm) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(2)
@@ -94411,11 +94411,11 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_gemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, k_eq_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(2)
@@ -94424,10 +94424,10 @@
     .m(2)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(2)
@@ -94437,10 +94437,10 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, k_eq_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_eq_1_strided_a) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(2)
@@ -94450,10 +94450,10 @@
     .n(2)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, k_eq_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 2; m++) {
     for (uint32_t n = 1; n <= 2; n++) {
       GemmMicrokernelTester()
@@ -94465,12 +94465,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 2; m++) {
     GemmMicrokernelTester()
       .mr(2)
@@ -94481,11 +94481,11 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 2; n++) {
     GemmMicrokernelTester()
       .mr(2)
@@ -94496,11 +94496,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, k_gt_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(2)
@@ -94510,11 +94510,11 @@
       .m(2)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, k_gt_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_gt_1_strided_a) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(2)
@@ -94525,11 +94525,11 @@
       .n(2)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, k_gt_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -94542,13 +94542,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, n_gt_2) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_gt_2) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94559,12 +94559,12 @@
         .m(2)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, n_gt_2_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_gt_2_strided_cn) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94576,12 +94576,12 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, n_gt_2_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_gt_2_strided_a) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94593,12 +94593,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, n_gt_2_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_gt_2_subtile) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -94611,13 +94611,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, n_div_2) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_div_2) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94628,12 +94628,12 @@
         .m(2)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, n_div_2_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_div_2_strided_cn) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94645,12 +94645,12 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, n_div_2_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_div_2_strided_a) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94662,12 +94662,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, n_div_2_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_div_2_subtile) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -94680,13 +94680,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, strided_cm_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -94700,13 +94700,13 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, qmin) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(2)
@@ -94716,10 +94716,10 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, qmax) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(2)
@@ -94729,10 +94729,10 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_2X2__SCALAR, strided_cm) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(2)
@@ -94742,11 +94742,11 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_gemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, k_eq_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(2)
@@ -94755,10 +94755,10 @@
     .m(3)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(2)
@@ -94768,10 +94768,10 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, k_eq_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_eq_1_strided_a) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(2)
@@ -94781,10 +94781,10 @@
     .n(2)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, k_eq_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 3; m++) {
     for (uint32_t n = 1; n <= 2; n++) {
       GemmMicrokernelTester()
@@ -94796,12 +94796,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 3; m++) {
     GemmMicrokernelTester()
       .mr(3)
@@ -94812,11 +94812,11 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 2; n++) {
     GemmMicrokernelTester()
       .mr(3)
@@ -94827,11 +94827,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, k_gt_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(3)
@@ -94841,11 +94841,11 @@
       .m(3)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, k_gt_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_gt_1_strided_a) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(3)
@@ -94856,11 +94856,11 @@
       .n(2)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, k_gt_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -94873,13 +94873,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, n_gt_2) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_gt_2) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94890,12 +94890,12 @@
         .m(3)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, n_gt_2_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_gt_2_strided_cn) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94907,12 +94907,12 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, n_gt_2_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_gt_2_strided_a) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94924,12 +94924,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, n_gt_2_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_gt_2_subtile) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -94942,13 +94942,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, n_div_2) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_div_2) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94959,12 +94959,12 @@
         .m(3)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, n_div_2_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_div_2_strided_cn) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94976,12 +94976,12 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, n_div_2_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_div_2_strided_a) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -94993,12 +94993,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, n_div_2_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_div_2_subtile) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -95011,13 +95011,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, strided_cm_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -95031,13 +95031,13 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, qmin) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(2)
@@ -95047,10 +95047,10 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, qmax) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(2)
@@ -95060,10 +95060,10 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_3X2__SCALAR, strided_cm) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(2)
@@ -95073,11 +95073,11 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_gemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, k_eq_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(2)
@@ -95086,10 +95086,10 @@
     .m(4)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(2)
@@ -95099,10 +95099,10 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, k_eq_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_eq_1_strided_a) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(2)
@@ -95112,10 +95112,10 @@
     .n(2)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 4; m++) {
     for (uint32_t n = 1; n <= 2; n++) {
       GemmMicrokernelTester()
@@ -95127,12 +95127,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 4; m++) {
     GemmMicrokernelTester()
       .mr(4)
@@ -95143,11 +95143,11 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 2; n++) {
     GemmMicrokernelTester()
       .mr(4)
@@ -95158,11 +95158,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, k_gt_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(4)
@@ -95172,11 +95172,11 @@
       .m(4)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, k_gt_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_gt_1_strided_a) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(4)
@@ -95187,11 +95187,11 @@
       .n(2)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, k_gt_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -95204,13 +95204,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, n_gt_2) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_gt_2) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95221,12 +95221,12 @@
         .m(4)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, n_gt_2_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_gt_2_strided_cn) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95238,12 +95238,12 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, n_gt_2_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_gt_2_strided_a) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95255,12 +95255,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, n_gt_2_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_gt_2_subtile) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -95273,13 +95273,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, n_div_2) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_div_2) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95290,12 +95290,12 @@
         .m(4)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, n_div_2_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_div_2_strided_cn) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95307,12 +95307,12 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, n_div_2_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_div_2_strided_a) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95324,12 +95324,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, n_div_2_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_div_2_subtile) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -95342,13 +95342,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, strided_cm_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -95362,13 +95362,13 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, qmin) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(2)
@@ -95378,10 +95378,10 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, qmax) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(2)
@@ -95391,10 +95391,10 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_4X2__SCALAR, strided_cm) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(2)
@@ -95404,11 +95404,11 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_gemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, k_eq_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(4)
@@ -95417,10 +95417,10 @@
     .m(1)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(4)
@@ -95430,10 +95430,10 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, k_eq_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_eq_1_strided_a) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(4)
@@ -95443,10 +95443,10 @@
     .n(4)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 1; m++) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -95458,12 +95458,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 1; m++) {
     GemmMicrokernelTester()
       .mr(1)
@@ -95474,11 +95474,11 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 4; n++) {
     GemmMicrokernelTester()
       .mr(1)
@@ -95489,11 +95489,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, k_gt_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(1)
@@ -95503,11 +95503,11 @@
       .m(1)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, k_gt_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_gt_1_strided_a) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(1)
@@ -95518,11 +95518,11 @@
       .n(4)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, k_gt_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -95535,13 +95535,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, n_gt_4) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_gt_4) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95552,12 +95552,12 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, n_gt_4_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_gt_4_strided_cn) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95569,12 +95569,12 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, n_gt_4_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_gt_4_strided_a) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95586,12 +95586,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, n_gt_4_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_gt_4_subtile) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -95604,13 +95604,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, n_div_4) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_div_4) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95621,12 +95621,12 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, n_div_4_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_div_4_strided_cn) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95638,12 +95638,12 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, n_div_4_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_div_4_strided_a) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95655,12 +95655,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, n_div_4_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_div_4_subtile) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -95673,13 +95673,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, strided_cm_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -95693,13 +95693,13 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, qmin) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(4)
@@ -95709,10 +95709,10 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, qmax) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(4)
@@ -95722,10 +95722,10 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_1X4__SCALAR, strided_cm) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(4)
@@ -95735,11 +95735,11 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_gemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, k_eq_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(4)
@@ -95748,10 +95748,10 @@
     .m(2)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(4)
@@ -95761,10 +95761,10 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, k_eq_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_eq_1_strided_a) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(4)
@@ -95774,10 +95774,10 @@
     .n(4)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, k_eq_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 2; m++) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -95789,12 +95789,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 2; m++) {
     GemmMicrokernelTester()
       .mr(2)
@@ -95805,11 +95805,11 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 4; n++) {
     GemmMicrokernelTester()
       .mr(2)
@@ -95820,11 +95820,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, k_gt_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(2)
@@ -95834,11 +95834,11 @@
       .m(2)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, k_gt_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_gt_1_strided_a) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(2)
@@ -95849,11 +95849,11 @@
       .n(4)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, k_gt_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -95866,13 +95866,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, n_gt_4) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_gt_4) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95883,12 +95883,12 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, n_gt_4_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_gt_4_strided_cn) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95900,12 +95900,12 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, n_gt_4_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_gt_4_strided_a) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95917,12 +95917,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, n_gt_4_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_gt_4_subtile) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -95935,13 +95935,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, n_div_4) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_div_4) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95952,12 +95952,12 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, n_div_4_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_div_4_strided_cn) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95969,12 +95969,12 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, n_div_4_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_div_4_strided_a) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -95986,12 +95986,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, n_div_4_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_div_4_subtile) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -96004,13 +96004,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, strided_cm_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -96024,13 +96024,13 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, qmin) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(4)
@@ -96040,10 +96040,10 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, qmax) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(4)
@@ -96053,10 +96053,10 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_2X4__SCALAR, strided_cm) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(4)
@@ -96066,11 +96066,11 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_gemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, k_eq_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(4)
@@ -96079,10 +96079,10 @@
     .m(3)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(4)
@@ -96092,10 +96092,10 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, k_eq_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_eq_1_strided_a) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(4)
@@ -96105,10 +96105,10 @@
     .n(4)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, k_eq_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 3; m++) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -96120,12 +96120,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 3; m++) {
     GemmMicrokernelTester()
       .mr(3)
@@ -96136,11 +96136,11 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 4; n++) {
     GemmMicrokernelTester()
       .mr(3)
@@ -96151,11 +96151,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, k_gt_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(3)
@@ -96165,11 +96165,11 @@
       .m(3)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, k_gt_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_gt_1_strided_a) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(3)
@@ -96180,11 +96180,11 @@
       .n(4)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, k_gt_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -96197,13 +96197,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, n_gt_4) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_gt_4) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -96214,12 +96214,12 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, n_gt_4_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_gt_4_strided_cn) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -96231,12 +96231,12 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, n_gt_4_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_gt_4_strided_a) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -96248,12 +96248,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, n_gt_4_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_gt_4_subtile) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -96266,13 +96266,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, n_div_4) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_div_4) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -96283,12 +96283,12 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, n_div_4_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_div_4_strided_cn) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -96300,12 +96300,12 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, n_div_4_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_div_4_strided_a) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -96317,12 +96317,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, n_div_4_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_div_4_subtile) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -96335,13 +96335,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, strided_cm_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -96355,13 +96355,13 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, qmin) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(4)
@@ -96371,10 +96371,10 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, qmax) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(4)
@@ -96384,10 +96384,10 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_3X4__SCALAR, strided_cm) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(4)
@@ -96397,11 +96397,11 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_gemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, k_eq_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(4)
@@ -96410,10 +96410,10 @@
     .m(4)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(4)
@@ -96423,10 +96423,10 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, k_eq_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_eq_1_strided_a) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(4)
@@ -96436,10 +96436,10 @@
     .n(4)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 4; m++) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -96451,12 +96451,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 4; m++) {
     GemmMicrokernelTester()
       .mr(4)
@@ -96467,11 +96467,11 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 4; n++) {
     GemmMicrokernelTester()
       .mr(4)
@@ -96482,11 +96482,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, k_gt_1) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(4)
@@ -96496,11 +96496,11 @@
       .m(4)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, k_gt_1_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_gt_1_strided_a) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(4)
@@ -96511,11 +96511,11 @@
       .n(4)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, k_gt_1_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -96528,13 +96528,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, n_gt_4) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_gt_4) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -96545,12 +96545,12 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, n_gt_4_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_gt_4_strided_cn) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -96562,12 +96562,12 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, n_gt_4_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_gt_4_strided_a) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -96579,12 +96579,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, n_gt_4_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_gt_4_subtile) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -96597,13 +96597,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, n_div_4) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_div_4) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -96614,12 +96614,12 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, n_div_4_strided_cn) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_div_4_strided_cn) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -96631,12 +96631,12 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, n_div_4_strided_a) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_div_4_strided_a) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -96648,12 +96648,12 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, n_div_4_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_div_4_subtile) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -96666,13 +96666,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, strided_cm_subtile) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -96686,13 +96686,13 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, qmin) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(4)
@@ -96702,10 +96702,10 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, qmax) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(4)
@@ -96715,10 +96715,10 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_GEMM_MINMAX_4X4__SCALAR, strided_cm) {
+TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(4)
@@ -96728,5 +96728,5 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_gemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
diff --git a/test/qs8-gemm-minmax-gemmlowp.yaml b/test/qs8-gemm-minmax-gemmlowp.yaml
new file mode 100644
index 0000000..69f9a0d
--- /dev/null
+++ b/test/qs8-gemm-minmax-gemmlowp.yaml
@@ -0,0 +1,650 @@
+# Copyright 2020 Google LLC
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 4
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 4
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2
+  init: xnn_init_qs8_gemm_avx2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2
+  init: xnn_init_qs8_gemm_avx2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2
+  init: xnn_init_qs8_gemm_avx2_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2
+  init: xnn_init_qs8_gemm_avx2_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2
+  init: xnn_init_qs8_gemm_avx2_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2
+  init: xnn_init_qs8_gemm_avx2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
+- name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
diff --git a/test/qs8-gemm-minmax.yaml b/test/qs8-gemm-minmax.yaml
deleted file mode 100644
index 53fd93d..0000000
--- a/test/qs8-gemm-minmax.yaml
+++ /dev/null
@@ -1,650 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-- name: xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 4
-- name: xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 4
-- name: xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2
-  init: xnn_init_qs8_gemm_avx2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2
-  init: xnn_init_qs8_gemm_avx2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2
-  init: xnn_init_qs8_gemm_avx2_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2
-  init: xnn_init_qs8_gemm_avx2_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2
-  init: xnn_init_qs8_gemm_avx2_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2
-  init: xnn_init_qs8_gemm_avx2_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_gemm_minmax_ukernel_1x2__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
-- name: xnn_qs8_gemm_minmax_ukernel_2x2__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
-- name: xnn_qs8_gemm_minmax_ukernel_3x2__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
-- name: xnn_qs8_gemm_minmax_ukernel_4x2__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
-- name: xnn_qs8_gemm_minmax_ukernel_1x4__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
-- name: xnn_qs8_gemm_minmax_ukernel_2x4__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
-- name: xnn_qs8_gemm_minmax_ukernel_3x4__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
-- name: xnn_qs8_gemm_minmax_ukernel_4x4__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
diff --git a/test/qs8-igemm-minmax.cc b/test/qs8-igemm-minmax-gemmlowp.cc
similarity index 61%
rename from test/qs8-igemm-minmax.cc
rename to test/qs8-igemm-minmax-gemmlowp.cc
index fa33931..e33912f 100644
--- a/test/qs8-igemm-minmax.cc
+++ b/test/qs8-igemm-minmax-gemmlowp.cc
@@ -7,7 +7,7 @@
 // LICENSE file in the root directory of this source tree.
 //
 // Auto-generated file. Do not edit!
-//   Specification: test/qs8-igemm-minmax.yaml
+//   Specification: test/qs8-igemm-minmax-gemmlowp.yaml
 //   Generator: tools/generate-gemm-test.py
 
 
@@ -23,7 +23,7 @@
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -33,10 +33,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -47,10 +47,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -63,12 +63,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -80,11 +80,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -96,11 +96,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -111,11 +111,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -129,13 +129,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -146,11 +146,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -164,13 +164,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -181,11 +181,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -199,13 +199,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -217,12 +217,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -235,12 +235,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -254,13 +254,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -272,12 +272,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -290,12 +290,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -309,13 +309,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -327,11 +327,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -346,13 +346,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -365,12 +365,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -383,12 +383,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -403,13 +403,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -422,11 +422,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -441,12 +441,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -457,10 +457,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -471,10 +471,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -485,13 +485,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -501,10 +501,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -515,10 +515,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -531,12 +531,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -548,11 +548,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -564,11 +564,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -579,11 +579,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -597,13 +597,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -614,11 +614,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -632,13 +632,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -649,11 +649,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -667,13 +667,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -685,12 +685,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -703,12 +703,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -722,13 +722,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -740,12 +740,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -758,12 +758,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -777,13 +777,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -795,11 +795,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -814,13 +814,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -833,12 +833,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -851,12 +851,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -871,13 +871,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -890,11 +890,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -909,12 +909,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -925,10 +925,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -939,10 +939,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -953,13 +953,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -969,10 +969,10 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -983,10 +983,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -999,12 +999,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -1016,11 +1016,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -1032,11 +1032,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -1047,11 +1047,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1065,13 +1065,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -1082,11 +1082,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1100,13 +1100,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -1117,11 +1117,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1135,13 +1135,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1153,12 +1153,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1171,12 +1171,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1190,13 +1190,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1208,12 +1208,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1226,12 +1226,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1245,13 +1245,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -1263,11 +1263,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1282,13 +1282,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1301,12 +1301,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1319,12 +1319,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1339,13 +1339,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -1358,11 +1358,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1377,12 +1377,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1393,10 +1393,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1407,10 +1407,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1421,13 +1421,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1437,10 +1437,10 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1451,10 +1451,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -1467,12 +1467,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -1484,11 +1484,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -1500,11 +1500,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -1515,11 +1515,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1533,13 +1533,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -1550,11 +1550,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1568,13 +1568,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -1585,11 +1585,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1603,13 +1603,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1621,12 +1621,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1639,12 +1639,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1658,13 +1658,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1676,12 +1676,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1694,12 +1694,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1713,13 +1713,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -1731,11 +1731,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1750,13 +1750,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1769,12 +1769,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1787,12 +1787,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -1807,13 +1807,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -1826,11 +1826,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -1845,12 +1845,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1861,10 +1861,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1875,10 +1875,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1889,13 +1889,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1905,10 +1905,10 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -1919,10 +1919,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -1935,12 +1935,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -1952,11 +1952,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -1968,11 +1968,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -1983,11 +1983,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2001,13 +2001,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -2018,11 +2018,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2036,13 +2036,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -2053,11 +2053,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2071,13 +2071,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2089,12 +2089,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2107,12 +2107,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2126,13 +2126,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2144,12 +2144,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2162,12 +2162,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2181,13 +2181,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -2199,11 +2199,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2218,13 +2218,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2237,12 +2237,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2255,12 +2255,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2275,13 +2275,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -2294,11 +2294,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2313,12 +2313,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2329,10 +2329,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2343,10 +2343,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2357,13 +2357,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2373,10 +2373,10 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2387,10 +2387,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -2403,12 +2403,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -2420,11 +2420,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -2436,11 +2436,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -2451,11 +2451,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2469,13 +2469,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -2486,11 +2486,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2504,13 +2504,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -2521,11 +2521,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2539,13 +2539,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2557,12 +2557,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2575,12 +2575,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2594,13 +2594,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2612,12 +2612,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2630,12 +2630,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2649,13 +2649,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -2667,11 +2667,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2686,13 +2686,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2705,12 +2705,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2723,12 +2723,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -2743,13 +2743,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -2762,11 +2762,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -2781,12 +2781,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2797,10 +2797,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2811,10 +2811,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -2825,13 +2825,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -2841,10 +2841,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -2855,10 +2855,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -2871,12 +2871,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -2888,11 +2888,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -2904,11 +2904,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -2919,11 +2919,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -2937,13 +2937,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -2954,11 +2954,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -2972,13 +2972,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -2989,11 +2989,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3007,13 +3007,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3025,12 +3025,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3043,12 +3043,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3062,13 +3062,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3080,12 +3080,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3098,12 +3098,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3117,13 +3117,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -3135,11 +3135,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3154,13 +3154,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3173,12 +3173,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3191,12 +3191,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3211,13 +3211,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -3230,11 +3230,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3249,12 +3249,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3265,10 +3265,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3279,10 +3279,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3293,13 +3293,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3309,10 +3309,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3323,10 +3323,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -3339,12 +3339,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -3356,11 +3356,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -3372,11 +3372,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -3387,11 +3387,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3405,13 +3405,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -3422,11 +3422,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3440,13 +3440,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -3457,11 +3457,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3475,13 +3475,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3493,12 +3493,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3511,12 +3511,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3530,13 +3530,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3548,12 +3548,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3566,12 +3566,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3585,13 +3585,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -3603,11 +3603,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3622,13 +3622,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3641,12 +3641,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3659,12 +3659,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3679,13 +3679,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -3698,11 +3698,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3717,12 +3717,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3733,10 +3733,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3747,10 +3747,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3761,13 +3761,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3777,10 +3777,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -3791,10 +3791,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -3807,12 +3807,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -3824,11 +3824,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -3840,11 +3840,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -3855,11 +3855,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3873,13 +3873,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -3890,11 +3890,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3908,13 +3908,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -3925,11 +3925,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -3943,13 +3943,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3961,12 +3961,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3979,12 +3979,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -3998,13 +3998,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4016,12 +4016,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4034,12 +4034,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4053,13 +4053,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -4071,11 +4071,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4090,13 +4090,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4109,12 +4109,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4127,12 +4127,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4147,13 +4147,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -4166,11 +4166,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4185,12 +4185,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4201,10 +4201,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4215,10 +4215,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4229,13 +4229,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4245,10 +4245,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4259,10 +4259,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -4275,12 +4275,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -4292,11 +4292,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -4308,11 +4308,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -4323,11 +4323,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4341,13 +4341,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -4358,11 +4358,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4376,13 +4376,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -4393,11 +4393,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4411,13 +4411,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4429,12 +4429,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4447,12 +4447,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4466,13 +4466,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4484,12 +4484,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4502,12 +4502,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4521,13 +4521,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -4539,11 +4539,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4558,13 +4558,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4577,12 +4577,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4595,12 +4595,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4615,13 +4615,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -4634,11 +4634,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4653,12 +4653,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4669,10 +4669,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4683,10 +4683,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4697,13 +4697,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4713,10 +4713,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -4727,10 +4727,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -4743,12 +4743,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -4760,11 +4760,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -4776,11 +4776,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -4791,11 +4791,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4809,13 +4809,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -4826,11 +4826,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4844,13 +4844,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -4861,11 +4861,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -4879,13 +4879,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4897,12 +4897,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4915,12 +4915,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4934,13 +4934,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4952,12 +4952,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4970,12 +4970,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -4989,13 +4989,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -5007,11 +5007,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -5026,13 +5026,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -5045,12 +5045,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -5063,12 +5063,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -5083,13 +5083,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -5102,11 +5102,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -5121,12 +5121,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -5137,10 +5137,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -5151,10 +5151,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -5165,13 +5165,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -5181,10 +5181,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -5195,10 +5195,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -5211,12 +5211,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -5228,11 +5228,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -5244,11 +5244,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -5259,11 +5259,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -5277,13 +5277,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -5294,11 +5294,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -5312,13 +5312,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -5329,11 +5329,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -5347,13 +5347,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5365,12 +5365,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5383,12 +5383,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5402,13 +5402,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5420,12 +5420,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5438,12 +5438,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5457,13 +5457,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -5475,11 +5475,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -5494,13 +5494,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5513,12 +5513,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5531,12 +5531,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -5551,13 +5551,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -5570,11 +5570,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5589,12 +5589,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -5605,10 +5605,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -5619,10 +5619,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -5633,13 +5633,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -5649,10 +5649,10 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -5663,10 +5663,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -5679,12 +5679,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -5696,11 +5696,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -5712,11 +5712,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -5727,11 +5727,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -5745,13 +5745,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -5762,11 +5762,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -5780,13 +5780,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -5797,11 +5797,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -5815,13 +5815,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5833,12 +5833,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5851,12 +5851,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5870,13 +5870,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5888,12 +5888,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5906,12 +5906,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5925,13 +5925,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -5943,11 +5943,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -5962,13 +5962,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5981,12 +5981,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -5999,12 +5999,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -6019,13 +6019,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -6038,11 +6038,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6057,12 +6057,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -6073,10 +6073,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -6087,10 +6087,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -6101,13 +6101,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -6117,10 +6117,10 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -6131,10 +6131,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -6147,12 +6147,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -6164,11 +6164,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -6180,11 +6180,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -6195,11 +6195,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -6213,13 +6213,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -6230,11 +6230,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -6248,13 +6248,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -6265,11 +6265,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -6283,13 +6283,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6301,12 +6301,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6319,12 +6319,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6338,13 +6338,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6356,12 +6356,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6374,12 +6374,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6393,13 +6393,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -6411,11 +6411,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -6430,13 +6430,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6449,12 +6449,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6467,12 +6467,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -6487,13 +6487,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -6506,11 +6506,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6525,12 +6525,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -6541,10 +6541,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -6555,10 +6555,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -6569,13 +6569,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -6585,10 +6585,10 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -6599,10 +6599,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -6615,12 +6615,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -6632,11 +6632,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -6648,11 +6648,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -6663,11 +6663,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -6681,13 +6681,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -6698,11 +6698,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -6716,13 +6716,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -6733,11 +6733,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -6751,13 +6751,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6769,12 +6769,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6787,12 +6787,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6806,13 +6806,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6824,12 +6824,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6842,12 +6842,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6861,13 +6861,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -6879,11 +6879,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -6898,13 +6898,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6917,12 +6917,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6935,12 +6935,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -6955,13 +6955,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -6974,11 +6974,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -6993,12 +6993,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -7009,10 +7009,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -7023,10 +7023,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -7037,13 +7037,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -7053,10 +7053,10 @@
       .m(6)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -7067,10 +7067,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -7083,12 +7083,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       GemmMicrokernelTester()
@@ -7100,11 +7100,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -7116,11 +7116,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -7131,11 +7131,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -7149,13 +7149,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -7166,11 +7166,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -7184,13 +7184,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -7201,11 +7201,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -7219,13 +7219,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7237,12 +7237,12 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7255,12 +7255,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7274,13 +7274,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7292,12 +7292,12 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7310,12 +7310,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7329,13 +7329,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -7347,11 +7347,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -7366,13 +7366,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7385,12 +7385,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7403,12 +7403,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -7423,13 +7423,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -7442,11 +7442,11 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 6; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7461,12 +7461,12 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -7477,10 +7477,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -7491,10 +7491,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -7505,13 +7505,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -7521,10 +7521,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -7535,10 +7535,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -7551,12 +7551,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -7568,11 +7568,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -7584,11 +7584,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -7599,11 +7599,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -7617,13 +7617,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -7634,11 +7634,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -7652,13 +7652,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -7669,11 +7669,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -7687,13 +7687,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7705,12 +7705,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7723,12 +7723,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7742,13 +7742,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7760,12 +7760,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7778,12 +7778,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7797,13 +7797,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -7815,11 +7815,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -7834,13 +7834,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7853,12 +7853,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7871,12 +7871,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -7891,13 +7891,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -7910,11 +7910,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -7929,12 +7929,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -7945,10 +7945,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -7959,10 +7959,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -7973,13 +7973,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -7989,10 +7989,10 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -8003,10 +8003,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -8019,12 +8019,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -8036,11 +8036,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -8052,11 +8052,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -8067,11 +8067,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -8085,13 +8085,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -8102,11 +8102,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -8120,13 +8120,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -8137,11 +8137,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -8155,13 +8155,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8173,12 +8173,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8191,12 +8191,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8210,13 +8210,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8228,12 +8228,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8246,12 +8246,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8265,13 +8265,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -8283,11 +8283,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -8302,13 +8302,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8321,12 +8321,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8339,12 +8339,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -8359,13 +8359,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -8378,11 +8378,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8397,12 +8397,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -8413,10 +8413,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -8427,10 +8427,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -8441,13 +8441,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -8457,10 +8457,10 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -8471,10 +8471,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -8487,12 +8487,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -8504,11 +8504,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -8520,11 +8520,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -8535,11 +8535,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -8553,13 +8553,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -8570,11 +8570,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -8588,13 +8588,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -8605,11 +8605,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -8623,13 +8623,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8641,12 +8641,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8659,12 +8659,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8678,13 +8678,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8696,12 +8696,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8714,12 +8714,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8733,13 +8733,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -8751,11 +8751,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -8770,13 +8770,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8789,12 +8789,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8807,12 +8807,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -8827,13 +8827,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -8846,11 +8846,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -8865,12 +8865,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -8881,10 +8881,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -8895,10 +8895,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -8909,13 +8909,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -8925,10 +8925,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -8939,10 +8939,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -8955,12 +8955,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -8972,11 +8972,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -8988,11 +8988,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -9003,11 +9003,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -9021,13 +9021,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -9038,11 +9038,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -9056,13 +9056,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -9073,11 +9073,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -9091,13 +9091,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9109,12 +9109,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9127,12 +9127,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9146,13 +9146,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9164,12 +9164,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9182,12 +9182,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9201,13 +9201,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -9219,11 +9219,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -9238,13 +9238,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9257,12 +9257,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9275,12 +9275,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -9295,13 +9295,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -9314,11 +9314,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9333,12 +9333,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -9349,10 +9349,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -9363,10 +9363,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -9377,13 +9377,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -9393,10 +9393,10 @@
       .m(6)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -9407,10 +9407,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -9423,12 +9423,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       GemmMicrokernelTester()
@@ -9440,11 +9440,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -9456,11 +9456,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -9471,11 +9471,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -9489,13 +9489,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -9506,11 +9506,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -9524,13 +9524,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -9541,11 +9541,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -9559,13 +9559,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9577,12 +9577,12 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9595,12 +9595,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9614,13 +9614,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9632,12 +9632,12 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9650,12 +9650,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9669,13 +9669,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -9687,11 +9687,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -9706,13 +9706,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9725,12 +9725,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9743,12 +9743,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -9763,13 +9763,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -9782,11 +9782,11 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 6; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -9801,12 +9801,12 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -9817,10 +9817,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -9831,10 +9831,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -9845,13 +9845,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -9861,10 +9861,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -9875,10 +9875,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -9891,12 +9891,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -9908,11 +9908,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -9924,11 +9924,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -9939,11 +9939,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -9957,13 +9957,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -9974,11 +9974,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -9992,13 +9992,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -10009,11 +10009,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -10027,13 +10027,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10045,12 +10045,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10063,12 +10063,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10082,13 +10082,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10100,12 +10100,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10118,12 +10118,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10137,13 +10137,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -10155,11 +10155,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -10174,13 +10174,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10193,12 +10193,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10211,12 +10211,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -10231,13 +10231,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -10250,11 +10250,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10269,12 +10269,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -10285,10 +10285,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -10299,10 +10299,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -10313,13 +10313,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -10329,10 +10329,10 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -10343,10 +10343,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -10359,12 +10359,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -10376,11 +10376,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -10392,11 +10392,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -10407,11 +10407,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -10425,13 +10425,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -10442,11 +10442,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -10460,13 +10460,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -10477,11 +10477,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -10495,13 +10495,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10513,12 +10513,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10531,12 +10531,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10550,13 +10550,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10568,12 +10568,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10586,12 +10586,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10605,13 +10605,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -10623,11 +10623,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -10642,13 +10642,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10661,12 +10661,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10679,12 +10679,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -10699,13 +10699,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -10718,11 +10718,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10737,12 +10737,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -10753,10 +10753,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -10767,10 +10767,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -10781,13 +10781,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -10797,10 +10797,10 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -10811,10 +10811,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -10827,12 +10827,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -10844,11 +10844,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -10860,11 +10860,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -10875,11 +10875,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -10893,13 +10893,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -10910,11 +10910,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -10928,13 +10928,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -10945,11 +10945,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -10963,13 +10963,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10981,12 +10981,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -10999,12 +10999,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11018,13 +11018,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11036,12 +11036,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11054,12 +11054,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11073,13 +11073,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -11091,11 +11091,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -11110,13 +11110,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11129,12 +11129,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11147,12 +11147,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -11167,13 +11167,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -11186,11 +11186,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11205,12 +11205,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -11221,10 +11221,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -11235,10 +11235,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -11249,13 +11249,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -11265,10 +11265,10 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -11279,10 +11279,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -11295,12 +11295,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -11312,11 +11312,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -11328,11 +11328,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -11343,11 +11343,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -11361,13 +11361,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -11378,11 +11378,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -11396,13 +11396,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -11413,11 +11413,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -11431,13 +11431,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11449,12 +11449,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11467,12 +11467,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11486,13 +11486,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11504,12 +11504,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11522,12 +11522,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11541,13 +11541,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -11559,11 +11559,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -11578,13 +11578,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11597,12 +11597,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11615,12 +11615,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -11635,13 +11635,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -11654,11 +11654,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11673,12 +11673,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -11689,10 +11689,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -11703,10 +11703,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -11717,13 +11717,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -11733,10 +11733,10 @@
       .m(6)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -11747,10 +11747,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -11763,12 +11763,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       GemmMicrokernelTester()
@@ -11780,11 +11780,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -11796,11 +11796,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -11811,11 +11811,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -11829,13 +11829,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -11846,11 +11846,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -11864,13 +11864,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -11881,11 +11881,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -11899,13 +11899,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11917,12 +11917,12 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11935,12 +11935,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11954,13 +11954,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11972,12 +11972,12 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -11990,12 +11990,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12009,13 +12009,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -12027,11 +12027,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -12046,13 +12046,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12065,12 +12065,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12083,12 +12083,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -12103,13 +12103,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -12122,11 +12122,11 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 6; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12141,12 +12141,12 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -12157,10 +12157,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -12171,10 +12171,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -12185,13 +12185,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -12201,10 +12201,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -12215,10 +12215,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -12231,12 +12231,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -12248,11 +12248,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -12264,11 +12264,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -12279,11 +12279,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -12297,13 +12297,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -12314,11 +12314,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -12332,13 +12332,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -12349,11 +12349,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -12367,13 +12367,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12385,12 +12385,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12403,12 +12403,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12422,13 +12422,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12440,12 +12440,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12458,12 +12458,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12477,13 +12477,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -12495,11 +12495,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -12514,13 +12514,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12533,12 +12533,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12551,12 +12551,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -12571,13 +12571,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -12590,11 +12590,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12609,12 +12609,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -12625,10 +12625,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -12639,10 +12639,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -12653,13 +12653,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -12669,10 +12669,10 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -12683,10 +12683,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -12699,12 +12699,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -12716,11 +12716,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -12732,11 +12732,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -12747,11 +12747,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -12765,13 +12765,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -12782,11 +12782,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -12800,13 +12800,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -12817,11 +12817,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -12835,13 +12835,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12853,12 +12853,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12871,12 +12871,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12890,13 +12890,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12908,12 +12908,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12926,12 +12926,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -12945,13 +12945,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -12963,11 +12963,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -12982,13 +12982,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13001,12 +13001,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13019,12 +13019,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -13039,13 +13039,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -13058,11 +13058,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13077,12 +13077,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -13093,10 +13093,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -13107,10 +13107,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -13121,13 +13121,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -13137,10 +13137,10 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -13151,10 +13151,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -13167,12 +13167,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -13184,11 +13184,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -13200,11 +13200,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -13215,11 +13215,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -13233,13 +13233,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -13250,11 +13250,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -13268,13 +13268,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -13285,11 +13285,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -13303,13 +13303,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13321,12 +13321,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13339,12 +13339,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13358,13 +13358,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13376,12 +13376,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13394,12 +13394,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13413,13 +13413,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -13431,11 +13431,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -13450,13 +13450,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13469,12 +13469,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13487,12 +13487,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -13507,13 +13507,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -13526,11 +13526,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13545,12 +13545,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -13561,10 +13561,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -13575,10 +13575,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -13589,13 +13589,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -13605,10 +13605,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -13619,10 +13619,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -13635,12 +13635,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -13652,11 +13652,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -13668,11 +13668,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -13683,11 +13683,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -13701,13 +13701,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -13718,11 +13718,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -13736,13 +13736,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -13753,11 +13753,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -13771,13 +13771,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13789,12 +13789,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13807,12 +13807,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13826,13 +13826,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13844,12 +13844,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13862,12 +13862,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13881,13 +13881,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -13899,11 +13899,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -13918,13 +13918,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13937,12 +13937,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -13955,12 +13955,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -13975,13 +13975,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -13994,11 +13994,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14013,12 +14013,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -14029,10 +14029,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -14043,10 +14043,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -14057,13 +14057,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -14073,10 +14073,10 @@
       .m(6)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -14087,10 +14087,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -14103,12 +14103,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 6; m++) {
       GemmMicrokernelTester()
@@ -14120,11 +14120,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -14136,11 +14136,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -14151,11 +14151,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -14169,13 +14169,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -14186,11 +14186,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -14204,13 +14204,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -14221,11 +14221,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -14239,13 +14239,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14257,12 +14257,12 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14275,12 +14275,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14294,13 +14294,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14312,12 +14312,12 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14330,12 +14330,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14349,13 +14349,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -14367,11 +14367,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -14386,13 +14386,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14405,12 +14405,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14423,12 +14423,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -14443,13 +14443,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -14462,11 +14462,11 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 6; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14481,12 +14481,12 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -14497,10 +14497,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -14511,10 +14511,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16__NEON_MLAL_LANE_PRFM, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(6)
@@ -14525,13 +14525,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -14541,10 +14541,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -14555,10 +14555,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -14571,12 +14571,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -14588,11 +14588,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -14604,11 +14604,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -14619,11 +14619,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -14637,13 +14637,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -14654,11 +14654,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -14672,13 +14672,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -14689,11 +14689,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -14707,13 +14707,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14725,12 +14725,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14743,12 +14743,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14762,13 +14762,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14780,12 +14780,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14798,12 +14798,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14817,13 +14817,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -14835,11 +14835,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -14854,13 +14854,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14873,12 +14873,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14891,12 +14891,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -14911,13 +14911,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -14930,11 +14930,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -14949,12 +14949,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -14965,10 +14965,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -14979,10 +14979,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -14993,13 +14993,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -15009,10 +15009,10 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -15023,10 +15023,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -15039,12 +15039,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -15056,11 +15056,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -15072,11 +15072,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -15087,11 +15087,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -15105,13 +15105,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -15122,11 +15122,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -15140,13 +15140,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -15157,11 +15157,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -15175,13 +15175,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15193,12 +15193,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15211,12 +15211,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15230,13 +15230,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15248,12 +15248,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15266,12 +15266,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15285,13 +15285,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -15303,11 +15303,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -15322,13 +15322,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15341,12 +15341,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15359,12 +15359,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -15379,13 +15379,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -15398,11 +15398,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15417,12 +15417,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -15433,10 +15433,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -15447,10 +15447,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -15461,13 +15461,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -15477,10 +15477,10 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -15491,10 +15491,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -15507,12 +15507,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -15524,11 +15524,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -15540,11 +15540,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -15555,11 +15555,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -15573,13 +15573,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -15590,11 +15590,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -15608,13 +15608,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -15625,11 +15625,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -15643,13 +15643,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15661,12 +15661,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15679,12 +15679,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15698,13 +15698,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15716,12 +15716,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15734,12 +15734,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15753,13 +15753,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -15771,11 +15771,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -15790,13 +15790,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15809,12 +15809,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15827,12 +15827,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -15847,13 +15847,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -15866,11 +15866,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -15885,12 +15885,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -15901,10 +15901,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -15915,10 +15915,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -15929,13 +15929,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -15945,10 +15945,10 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -15959,10 +15959,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -15975,12 +15975,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -15992,11 +15992,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -16008,11 +16008,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -16023,11 +16023,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -16041,13 +16041,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -16058,11 +16058,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -16076,13 +16076,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -16093,11 +16093,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -16111,13 +16111,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16129,12 +16129,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16147,12 +16147,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16166,13 +16166,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16184,12 +16184,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16202,12 +16202,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16221,13 +16221,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -16239,11 +16239,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -16258,13 +16258,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16277,12 +16277,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16295,12 +16295,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -16315,13 +16315,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -16334,11 +16334,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16353,12 +16353,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -16369,10 +16369,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -16383,10 +16383,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -16397,13 +16397,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -16413,10 +16413,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -16427,10 +16427,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -16443,12 +16443,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -16460,11 +16460,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -16476,11 +16476,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -16491,11 +16491,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -16509,13 +16509,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -16526,11 +16526,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -16544,13 +16544,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -16561,11 +16561,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -16579,13 +16579,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16597,12 +16597,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16615,12 +16615,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16634,13 +16634,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16652,12 +16652,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16670,12 +16670,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16689,13 +16689,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -16707,11 +16707,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -16726,13 +16726,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16745,12 +16745,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16763,12 +16763,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -16783,13 +16783,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -16802,11 +16802,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -16821,12 +16821,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -16837,10 +16837,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -16851,10 +16851,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -16865,13 +16865,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -16881,10 +16881,10 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -16895,10 +16895,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -16911,12 +16911,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -16928,11 +16928,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -16944,11 +16944,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -16959,11 +16959,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -16977,13 +16977,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -16994,11 +16994,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -17012,13 +17012,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -17029,11 +17029,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -17047,13 +17047,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17065,12 +17065,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17083,12 +17083,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17102,13 +17102,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17120,12 +17120,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17138,12 +17138,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17157,13 +17157,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -17175,11 +17175,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -17194,13 +17194,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17213,12 +17213,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17231,12 +17231,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -17251,13 +17251,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -17270,11 +17270,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17289,12 +17289,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -17305,10 +17305,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -17319,10 +17319,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -17333,13 +17333,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -17349,10 +17349,10 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -17363,10 +17363,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -17379,12 +17379,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -17396,11 +17396,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -17412,11 +17412,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -17427,11 +17427,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -17445,13 +17445,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -17462,11 +17462,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -17480,13 +17480,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -17497,11 +17497,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -17515,13 +17515,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17533,12 +17533,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17551,12 +17551,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17570,13 +17570,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17588,12 +17588,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17606,12 +17606,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17625,13 +17625,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -17643,11 +17643,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -17662,13 +17662,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17681,12 +17681,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17699,12 +17699,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -17719,13 +17719,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -17738,11 +17738,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -17757,12 +17757,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -17773,10 +17773,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -17787,10 +17787,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -17801,13 +17801,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -17817,10 +17817,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -17831,10 +17831,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -17847,12 +17847,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -17864,11 +17864,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -17880,11 +17880,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -17895,11 +17895,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -17913,13 +17913,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -17930,11 +17930,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -17948,13 +17948,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -17965,11 +17965,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -17983,13 +17983,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18001,12 +18001,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18019,12 +18019,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18038,13 +18038,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18056,12 +18056,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18074,12 +18074,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18093,13 +18093,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -18111,11 +18111,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -18130,13 +18130,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18149,12 +18149,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18167,12 +18167,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -18187,13 +18187,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -18206,11 +18206,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -18225,12 +18225,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -18241,10 +18241,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -18255,10 +18255,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MULL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -18269,13 +18269,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -18285,10 +18285,10 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -18299,10 +18299,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -18315,12 +18315,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -18332,11 +18332,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -18348,11 +18348,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -18363,11 +18363,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -18381,13 +18381,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -18398,11 +18398,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -18416,13 +18416,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -18433,11 +18433,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -18451,13 +18451,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -18469,12 +18469,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -18487,12 +18487,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -18506,13 +18506,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -18524,12 +18524,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -18542,12 +18542,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -18561,13 +18561,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -18579,11 +18579,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -18598,13 +18598,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -18617,12 +18617,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -18635,12 +18635,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -18655,13 +18655,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -18674,11 +18674,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -18693,12 +18693,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -18709,10 +18709,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -18723,10 +18723,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -18737,13 +18737,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -18753,10 +18753,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -18767,10 +18767,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -18783,12 +18783,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -18800,11 +18800,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -18816,11 +18816,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -18831,11 +18831,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -18849,13 +18849,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -18866,11 +18866,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -18884,13 +18884,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -18901,11 +18901,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -18919,13 +18919,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -18937,12 +18937,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -18955,12 +18955,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -18974,13 +18974,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -18992,12 +18992,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19010,12 +19010,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19029,13 +19029,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -19047,11 +19047,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -19066,13 +19066,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19085,12 +19085,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19103,12 +19103,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -19123,13 +19123,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -19142,11 +19142,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19161,12 +19161,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -19177,10 +19177,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -19191,10 +19191,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -19205,13 +19205,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -19221,10 +19221,10 @@
       .m(3)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -19235,10 +19235,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -19251,12 +19251,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -19268,11 +19268,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -19284,11 +19284,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -19299,11 +19299,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -19317,13 +19317,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -19334,11 +19334,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -19352,13 +19352,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -19369,11 +19369,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -19387,13 +19387,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19405,12 +19405,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19423,12 +19423,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19442,13 +19442,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19460,12 +19460,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19478,12 +19478,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19497,13 +19497,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -19515,11 +19515,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -19534,13 +19534,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19553,12 +19553,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19571,12 +19571,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -19591,13 +19591,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -19610,11 +19610,11 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19629,12 +19629,12 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -19645,10 +19645,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -19659,10 +19659,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -19673,13 +19673,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -19689,10 +19689,10 @@
       .m(4)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -19703,10 +19703,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -19719,12 +19719,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -19736,11 +19736,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -19752,11 +19752,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -19767,11 +19767,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -19785,13 +19785,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -19802,11 +19802,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -19820,13 +19820,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -19837,11 +19837,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -19855,13 +19855,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19873,12 +19873,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19891,12 +19891,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19910,13 +19910,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19928,12 +19928,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19946,12 +19946,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -19965,13 +19965,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -19983,11 +19983,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -20002,13 +20002,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20021,12 +20021,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20039,12 +20039,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -20059,13 +20059,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -20078,11 +20078,11 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20097,12 +20097,12 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -20113,10 +20113,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -20127,10 +20127,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -20141,13 +20141,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -20157,10 +20157,10 @@
       .m(1)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -20171,10 +20171,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -20187,12 +20187,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -20204,11 +20204,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -20220,11 +20220,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -20235,11 +20235,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -20253,13 +20253,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -20270,11 +20270,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -20288,13 +20288,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -20305,11 +20305,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -20323,13 +20323,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20341,12 +20341,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20359,12 +20359,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20378,13 +20378,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20396,12 +20396,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20414,12 +20414,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20433,13 +20433,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -20451,11 +20451,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -20470,13 +20470,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20489,12 +20489,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20507,12 +20507,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -20527,13 +20527,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -20546,11 +20546,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20565,12 +20565,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -20581,10 +20581,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -20595,10 +20595,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -20609,13 +20609,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -20625,10 +20625,10 @@
       .m(2)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -20639,10 +20639,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -20655,12 +20655,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -20672,11 +20672,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -20688,11 +20688,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -20703,11 +20703,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -20721,13 +20721,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -20738,11 +20738,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -20756,13 +20756,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -20773,11 +20773,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -20791,13 +20791,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20809,12 +20809,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20827,12 +20827,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20846,13 +20846,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20864,12 +20864,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20882,12 +20882,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20901,13 +20901,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -20919,11 +20919,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -20938,13 +20938,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20957,12 +20957,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -20975,12 +20975,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -20995,13 +20995,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -21014,11 +21014,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21033,12 +21033,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -21049,10 +21049,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -21063,10 +21063,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -21077,13 +21077,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -21093,10 +21093,10 @@
       .m(3)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -21107,10 +21107,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -21123,12 +21123,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -21140,11 +21140,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -21156,11 +21156,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -21171,11 +21171,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -21189,13 +21189,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -21206,11 +21206,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -21224,13 +21224,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -21241,11 +21241,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -21259,13 +21259,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21277,12 +21277,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21295,12 +21295,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21314,13 +21314,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21332,12 +21332,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21350,12 +21350,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21369,13 +21369,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -21387,11 +21387,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -21406,13 +21406,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21425,12 +21425,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21443,12 +21443,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -21463,13 +21463,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -21482,11 +21482,11 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21501,12 +21501,12 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -21517,10 +21517,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -21531,10 +21531,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -21545,13 +21545,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -21561,10 +21561,10 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -21575,10 +21575,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -21591,12 +21591,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -21608,11 +21608,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -21624,11 +21624,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -21639,11 +21639,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -21657,13 +21657,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -21674,11 +21674,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -21692,13 +21692,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -21709,11 +21709,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -21727,13 +21727,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21745,12 +21745,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21763,12 +21763,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21782,13 +21782,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21800,12 +21800,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21818,12 +21818,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21837,13 +21837,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -21855,11 +21855,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -21874,13 +21874,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21893,12 +21893,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21911,12 +21911,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -21931,13 +21931,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -21950,11 +21950,11 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -21969,12 +21969,12 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -21985,10 +21985,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -21999,10 +21999,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -22013,13 +22013,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -22029,10 +22029,10 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -22043,10 +22043,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -22059,12 +22059,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -22076,11 +22076,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -22092,11 +22092,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -22107,11 +22107,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -22125,13 +22125,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -22142,11 +22142,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -22160,13 +22160,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -22177,11 +22177,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -22195,13 +22195,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22213,12 +22213,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22231,12 +22231,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22250,13 +22250,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22268,12 +22268,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22286,12 +22286,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22305,13 +22305,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -22323,11 +22323,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -22342,13 +22342,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22361,12 +22361,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22379,12 +22379,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -22399,13 +22399,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -22418,11 +22418,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22437,12 +22437,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -22453,10 +22453,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -22467,10 +22467,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -22481,13 +22481,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -22497,10 +22497,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -22511,10 +22511,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -22527,12 +22527,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -22544,11 +22544,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -22560,11 +22560,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -22575,11 +22575,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -22593,13 +22593,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -22610,11 +22610,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -22628,13 +22628,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -22645,11 +22645,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -22663,13 +22663,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22681,12 +22681,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22699,12 +22699,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22718,13 +22718,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22736,12 +22736,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22754,12 +22754,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22773,13 +22773,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -22791,11 +22791,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -22810,13 +22810,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22829,12 +22829,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22847,12 +22847,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -22867,13 +22867,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -22886,11 +22886,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -22905,12 +22905,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -22921,10 +22921,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -22935,10 +22935,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -22949,13 +22949,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -22965,10 +22965,10 @@
       .m(3)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -22979,10 +22979,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -22995,12 +22995,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -23012,11 +23012,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -23028,11 +23028,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -23043,11 +23043,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -23061,13 +23061,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -23078,11 +23078,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -23096,13 +23096,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -23113,11 +23113,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -23131,13 +23131,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23149,12 +23149,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23167,12 +23167,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23186,13 +23186,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23204,12 +23204,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23222,12 +23222,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23241,13 +23241,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -23259,11 +23259,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -23278,13 +23278,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23297,12 +23297,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23315,12 +23315,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -23335,13 +23335,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -23354,11 +23354,11 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23373,12 +23373,12 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -23389,10 +23389,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -23403,10 +23403,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -23417,13 +23417,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -23433,10 +23433,10 @@
       .m(4)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -23447,10 +23447,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -23463,12 +23463,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -23480,11 +23480,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -23496,11 +23496,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -23511,11 +23511,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -23529,13 +23529,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -23546,11 +23546,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -23564,13 +23564,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -23581,11 +23581,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -23599,13 +23599,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23617,12 +23617,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23635,12 +23635,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23654,13 +23654,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23672,12 +23672,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23690,12 +23690,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23709,13 +23709,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -23727,11 +23727,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -23746,13 +23746,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23765,12 +23765,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23783,12 +23783,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -23803,13 +23803,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -23822,11 +23822,11 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -23841,12 +23841,12 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -23857,10 +23857,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -23871,10 +23871,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -23885,13 +23885,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -23901,10 +23901,10 @@
       .m(1)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -23915,10 +23915,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -23931,12 +23931,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -23948,11 +23948,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -23964,11 +23964,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -23979,11 +23979,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -23997,13 +23997,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -24014,11 +24014,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -24032,13 +24032,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -24049,11 +24049,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -24067,13 +24067,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24085,12 +24085,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24103,12 +24103,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24122,13 +24122,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24140,12 +24140,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24158,12 +24158,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24177,13 +24177,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -24195,11 +24195,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -24214,13 +24214,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24233,12 +24233,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24251,12 +24251,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -24271,13 +24271,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -24290,11 +24290,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24309,12 +24309,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -24325,10 +24325,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -24339,10 +24339,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -24353,13 +24353,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -24369,10 +24369,10 @@
       .m(2)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -24383,10 +24383,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -24399,12 +24399,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -24416,11 +24416,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -24432,11 +24432,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -24447,11 +24447,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -24465,13 +24465,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -24482,11 +24482,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -24500,13 +24500,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -24517,11 +24517,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -24535,13 +24535,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24553,12 +24553,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24571,12 +24571,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24590,13 +24590,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24608,12 +24608,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24626,12 +24626,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24645,13 +24645,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -24663,11 +24663,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -24682,13 +24682,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24701,12 +24701,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24719,12 +24719,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -24739,13 +24739,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -24758,11 +24758,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -24777,12 +24777,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -24793,10 +24793,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -24807,10 +24807,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -24821,13 +24821,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -24837,10 +24837,10 @@
       .m(3)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -24851,10 +24851,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -24867,12 +24867,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -24884,11 +24884,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -24900,11 +24900,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -24915,11 +24915,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -24933,13 +24933,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -24950,11 +24950,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -24968,13 +24968,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -24985,11 +24985,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -25003,13 +25003,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25021,12 +25021,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25039,12 +25039,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25058,13 +25058,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25076,12 +25076,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25094,12 +25094,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25113,13 +25113,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -25131,11 +25131,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -25150,13 +25150,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25169,12 +25169,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25187,12 +25187,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -25207,13 +25207,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -25226,11 +25226,11 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25245,12 +25245,12 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -25261,10 +25261,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -25275,10 +25275,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -25289,13 +25289,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -25305,10 +25305,10 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -25319,10 +25319,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -25335,12 +25335,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -25352,11 +25352,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -25368,11 +25368,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -25383,11 +25383,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -25401,13 +25401,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -25418,11 +25418,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -25436,13 +25436,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -25453,11 +25453,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -25471,13 +25471,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25489,12 +25489,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25507,12 +25507,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25526,13 +25526,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25544,12 +25544,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25562,12 +25562,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25581,13 +25581,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -25599,11 +25599,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -25618,13 +25618,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25637,12 +25637,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25655,12 +25655,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -25675,13 +25675,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -25694,11 +25694,11 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -25713,12 +25713,12 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -25729,10 +25729,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -25743,10 +25743,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C16__NEON_MLAL_PADAL, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -25757,13 +25757,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -25773,10 +25773,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -25787,10 +25787,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -25803,12 +25803,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -25820,11 +25820,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -25836,11 +25836,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -25851,11 +25851,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -25869,13 +25869,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -25886,11 +25886,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -25904,13 +25904,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -25921,11 +25921,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -25939,13 +25939,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25957,12 +25957,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25975,12 +25975,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -25994,13 +25994,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26012,12 +26012,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26030,12 +26030,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26049,13 +26049,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -26067,11 +26067,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -26086,13 +26086,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26105,12 +26105,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26123,12 +26123,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -26143,13 +26143,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -26162,11 +26162,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26181,12 +26181,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -26197,10 +26197,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -26211,10 +26211,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -26225,13 +26225,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -26241,10 +26241,10 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -26255,10 +26255,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -26271,12 +26271,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -26288,11 +26288,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -26304,11 +26304,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -26319,11 +26319,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -26337,13 +26337,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -26354,11 +26354,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -26372,13 +26372,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -26389,11 +26389,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -26407,13 +26407,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26425,12 +26425,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26443,12 +26443,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26462,13 +26462,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26480,12 +26480,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26498,12 +26498,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26517,13 +26517,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -26535,11 +26535,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -26554,13 +26554,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26573,12 +26573,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26591,12 +26591,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -26611,13 +26611,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -26630,11 +26630,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26649,12 +26649,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -26665,10 +26665,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -26679,10 +26679,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -26693,13 +26693,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -26709,10 +26709,10 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -26723,10 +26723,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -26739,12 +26739,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -26756,11 +26756,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -26772,11 +26772,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -26787,11 +26787,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -26805,13 +26805,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -26822,11 +26822,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -26840,13 +26840,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -26857,11 +26857,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -26875,13 +26875,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26893,12 +26893,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26911,12 +26911,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26930,13 +26930,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26948,12 +26948,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26966,12 +26966,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -26985,13 +26985,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -27003,11 +27003,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -27022,13 +27022,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27041,12 +27041,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27059,12 +27059,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -27079,13 +27079,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -27098,11 +27098,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27117,12 +27117,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -27133,10 +27133,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -27147,10 +27147,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -27161,13 +27161,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -27177,10 +27177,10 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -27191,10 +27191,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -27207,12 +27207,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -27224,11 +27224,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -27240,11 +27240,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -27255,11 +27255,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -27273,13 +27273,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -27290,11 +27290,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -27308,13 +27308,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -27325,11 +27325,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -27343,13 +27343,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27361,12 +27361,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27379,12 +27379,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27398,13 +27398,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27416,12 +27416,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27434,12 +27434,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27453,13 +27453,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -27471,11 +27471,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -27490,13 +27490,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27509,12 +27509,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27527,12 +27527,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -27547,13 +27547,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -27566,11 +27566,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27585,12 +27585,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -27601,10 +27601,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -27615,10 +27615,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -27629,13 +27629,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -27645,10 +27645,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -27659,10 +27659,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -27675,12 +27675,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -27692,11 +27692,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -27708,11 +27708,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -27723,11 +27723,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -27741,13 +27741,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -27758,11 +27758,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -27776,13 +27776,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -27793,11 +27793,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -27811,13 +27811,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27829,12 +27829,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27847,12 +27847,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27866,13 +27866,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27884,12 +27884,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27902,12 +27902,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27921,13 +27921,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -27939,11 +27939,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -27958,13 +27958,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27977,12 +27977,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -27995,12 +27995,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -28015,13 +28015,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -28034,11 +28034,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28053,12 +28053,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -28069,10 +28069,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -28083,10 +28083,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -28097,13 +28097,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -28113,10 +28113,10 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -28127,10 +28127,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -28143,12 +28143,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -28160,11 +28160,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -28176,11 +28176,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -28191,11 +28191,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -28209,13 +28209,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -28226,11 +28226,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -28244,13 +28244,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -28261,11 +28261,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -28279,13 +28279,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28297,12 +28297,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28315,12 +28315,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28334,13 +28334,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28352,12 +28352,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28370,12 +28370,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28389,13 +28389,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -28407,11 +28407,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -28426,13 +28426,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28445,12 +28445,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28463,12 +28463,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -28483,13 +28483,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -28502,11 +28502,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28521,12 +28521,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -28537,10 +28537,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -28551,10 +28551,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -28565,13 +28565,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -28581,10 +28581,10 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -28595,10 +28595,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -28611,12 +28611,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -28628,11 +28628,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -28644,11 +28644,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -28659,11 +28659,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -28677,13 +28677,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -28694,11 +28694,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -28712,13 +28712,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -28729,11 +28729,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -28747,13 +28747,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28765,12 +28765,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28783,12 +28783,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28802,13 +28802,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28820,12 +28820,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28838,12 +28838,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28857,13 +28857,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -28875,11 +28875,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -28894,13 +28894,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28913,12 +28913,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28931,12 +28931,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -28951,13 +28951,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -28970,11 +28970,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -28989,12 +28989,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -29005,10 +29005,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -29019,10 +29019,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -29033,13 +29033,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -29049,10 +29049,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -29063,10 +29063,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -29079,12 +29079,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -29096,11 +29096,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -29112,11 +29112,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -29127,11 +29127,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -29145,13 +29145,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -29162,11 +29162,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -29180,13 +29180,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -29197,11 +29197,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -29215,13 +29215,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -29233,12 +29233,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -29251,12 +29251,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -29270,13 +29270,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -29288,12 +29288,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -29306,12 +29306,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -29325,13 +29325,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -29343,11 +29343,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -29362,13 +29362,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -29381,12 +29381,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -29399,12 +29399,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -29419,13 +29419,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -29438,11 +29438,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -29457,12 +29457,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -29473,10 +29473,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -29487,10 +29487,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -29501,13 +29501,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -29517,10 +29517,10 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -29531,10 +29531,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -29547,12 +29547,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -29564,11 +29564,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -29580,11 +29580,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -29595,11 +29595,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -29613,13 +29613,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -29630,11 +29630,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -29648,13 +29648,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -29665,11 +29665,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -29683,13 +29683,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29701,12 +29701,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29719,12 +29719,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29738,13 +29738,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29756,12 +29756,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29774,12 +29774,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29793,13 +29793,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -29811,11 +29811,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -29830,13 +29830,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29849,12 +29849,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29867,12 +29867,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -29887,13 +29887,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -29906,11 +29906,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -29925,12 +29925,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -29941,10 +29941,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -29955,10 +29955,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -29969,13 +29969,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -29985,10 +29985,10 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -29999,10 +29999,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -30015,12 +30015,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -30032,11 +30032,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -30048,11 +30048,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -30063,11 +30063,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -30081,13 +30081,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -30098,11 +30098,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -30116,13 +30116,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -30133,11 +30133,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -30151,13 +30151,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30169,12 +30169,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30187,12 +30187,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30206,13 +30206,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30224,12 +30224,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30242,12 +30242,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30261,13 +30261,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -30279,11 +30279,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -30298,13 +30298,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30317,12 +30317,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30335,12 +30335,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -30355,13 +30355,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -30374,11 +30374,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30393,12 +30393,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -30409,10 +30409,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -30423,10 +30423,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -30437,13 +30437,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -30453,10 +30453,10 @@
       .m(3)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -30467,10 +30467,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -30483,12 +30483,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -30500,11 +30500,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -30516,11 +30516,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -30531,11 +30531,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -30549,13 +30549,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -30566,11 +30566,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -30584,13 +30584,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -30601,11 +30601,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -30619,13 +30619,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30637,12 +30637,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30655,12 +30655,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30674,13 +30674,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30692,12 +30692,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30710,12 +30710,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30729,13 +30729,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -30747,11 +30747,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -30766,13 +30766,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30785,12 +30785,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30803,12 +30803,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -30823,13 +30823,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -30842,11 +30842,11 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -30861,12 +30861,12 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -30877,10 +30877,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -30891,10 +30891,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -30905,13 +30905,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -30921,10 +30921,10 @@
       .m(4)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -30935,10 +30935,10 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -30951,12 +30951,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -30968,11 +30968,11 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -30984,11 +30984,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -30999,11 +30999,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -31017,13 +31017,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -31034,11 +31034,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -31052,13 +31052,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -31069,11 +31069,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -31087,13 +31087,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31105,12 +31105,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31123,12 +31123,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31142,13 +31142,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31160,12 +31160,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31178,12 +31178,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31197,13 +31197,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -31215,11 +31215,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -31234,13 +31234,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31253,12 +31253,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31271,12 +31271,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -31291,13 +31291,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -31310,11 +31310,11 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31329,12 +31329,12 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -31345,10 +31345,10 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -31359,10 +31359,10 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -31373,13 +31373,13 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -31389,10 +31389,10 @@
       .m(1)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -31403,10 +31403,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -31419,12 +31419,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -31436,11 +31436,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -31452,11 +31452,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -31467,11 +31467,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -31485,13 +31485,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -31502,11 +31502,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -31520,13 +31520,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -31537,11 +31537,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -31555,13 +31555,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31573,12 +31573,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31591,12 +31591,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31610,13 +31610,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31628,12 +31628,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31646,12 +31646,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31665,13 +31665,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -31683,11 +31683,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -31702,13 +31702,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31721,12 +31721,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31739,12 +31739,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -31759,13 +31759,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -31778,11 +31778,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -31797,12 +31797,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -31813,10 +31813,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -31827,10 +31827,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -31841,13 +31841,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -31857,10 +31857,10 @@
       .m(2)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -31871,10 +31871,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -31887,12 +31887,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -31904,11 +31904,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -31920,11 +31920,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -31935,11 +31935,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -31953,13 +31953,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -31970,11 +31970,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -31988,13 +31988,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -32005,11 +32005,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -32023,13 +32023,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32041,12 +32041,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32059,12 +32059,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32078,13 +32078,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32096,12 +32096,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32114,12 +32114,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32133,13 +32133,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -32151,11 +32151,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -32170,13 +32170,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32189,12 +32189,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32207,12 +32207,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -32227,13 +32227,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -32246,11 +32246,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32265,12 +32265,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -32281,10 +32281,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -32295,10 +32295,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -32309,13 +32309,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -32325,10 +32325,10 @@
       .m(3)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -32339,10 +32339,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -32355,12 +32355,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -32372,11 +32372,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -32388,11 +32388,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -32403,11 +32403,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -32421,13 +32421,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -32438,11 +32438,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -32456,13 +32456,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -32473,11 +32473,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -32491,13 +32491,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32509,12 +32509,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32527,12 +32527,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32546,13 +32546,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32564,12 +32564,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32582,12 +32582,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32601,13 +32601,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -32619,11 +32619,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -32638,13 +32638,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32657,12 +32657,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32675,12 +32675,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -32695,13 +32695,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -32714,11 +32714,11 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32733,12 +32733,12 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -32749,10 +32749,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -32763,10 +32763,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -32777,13 +32777,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -32793,10 +32793,10 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -32807,10 +32807,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -32823,12 +32823,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -32840,11 +32840,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -32856,11 +32856,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_lt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -32871,11 +32871,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -32889,13 +32889,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -32906,11 +32906,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -32924,13 +32924,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -32941,11 +32941,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -32959,13 +32959,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32977,12 +32977,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -32995,12 +32995,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33014,13 +33014,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33032,12 +33032,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33050,12 +33050,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33069,13 +33069,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -33087,11 +33087,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -33106,13 +33106,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33125,12 +33125,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33143,12 +33143,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -33163,13 +33163,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -33182,11 +33182,11 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -33201,12 +33201,12 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -33217,10 +33217,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -33231,10 +33231,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -33245,13 +33245,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -33261,10 +33261,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -33275,10 +33275,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -33291,12 +33291,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -33308,11 +33308,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -33324,11 +33324,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -33339,11 +33339,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -33357,13 +33357,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -33374,11 +33374,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -33392,13 +33392,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -33409,11 +33409,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -33427,13 +33427,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -33445,12 +33445,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -33463,12 +33463,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -33482,13 +33482,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -33500,12 +33500,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -33518,12 +33518,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -33537,13 +33537,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -33555,11 +33555,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -33574,13 +33574,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -33593,12 +33593,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -33611,12 +33611,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -33631,13 +33631,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -33650,11 +33650,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -33669,12 +33669,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -33685,10 +33685,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -33699,10 +33699,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -33713,13 +33713,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -33729,10 +33729,10 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -33743,10 +33743,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -33759,12 +33759,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -33776,11 +33776,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -33792,11 +33792,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -33807,11 +33807,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -33825,13 +33825,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -33842,11 +33842,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -33860,13 +33860,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -33877,11 +33877,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -33895,13 +33895,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -33913,12 +33913,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -33931,12 +33931,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -33950,13 +33950,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -33968,12 +33968,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -33986,12 +33986,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34005,13 +34005,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -34023,11 +34023,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -34042,13 +34042,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34061,12 +34061,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34079,12 +34079,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -34099,13 +34099,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -34118,11 +34118,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34137,12 +34137,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -34153,10 +34153,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -34167,10 +34167,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -34181,13 +34181,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -34197,10 +34197,10 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -34211,10 +34211,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -34227,12 +34227,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -34244,11 +34244,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -34260,11 +34260,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -34275,11 +34275,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -34293,13 +34293,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -34310,11 +34310,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -34328,13 +34328,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -34345,11 +34345,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -34363,13 +34363,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34381,12 +34381,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34399,12 +34399,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34418,13 +34418,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34436,12 +34436,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34454,12 +34454,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34473,13 +34473,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -34491,11 +34491,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -34510,13 +34510,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34529,12 +34529,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34547,12 +34547,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -34567,13 +34567,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -34586,11 +34586,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34605,12 +34605,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -34621,10 +34621,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -34635,10 +34635,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -34649,13 +34649,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -34665,10 +34665,10 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -34679,10 +34679,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -34695,12 +34695,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -34712,11 +34712,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -34728,11 +34728,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -34743,11 +34743,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -34761,13 +34761,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -34778,11 +34778,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -34796,13 +34796,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -34813,11 +34813,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -34831,13 +34831,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34849,12 +34849,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34867,12 +34867,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34886,13 +34886,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34904,12 +34904,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34922,12 +34922,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34941,13 +34941,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -34959,11 +34959,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -34978,13 +34978,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -34997,12 +34997,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35015,12 +35015,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -35035,13 +35035,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -35054,11 +35054,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35073,12 +35073,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -35089,10 +35089,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -35103,10 +35103,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -35117,13 +35117,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -35133,10 +35133,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -35147,10 +35147,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -35163,12 +35163,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -35180,11 +35180,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -35196,11 +35196,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -35211,11 +35211,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -35229,13 +35229,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -35246,11 +35246,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -35264,13 +35264,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -35281,11 +35281,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -35299,13 +35299,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35317,12 +35317,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35335,12 +35335,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35354,13 +35354,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35372,12 +35372,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35390,12 +35390,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35409,13 +35409,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -35427,11 +35427,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -35446,13 +35446,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35465,12 +35465,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35483,12 +35483,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -35503,13 +35503,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -35522,11 +35522,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35541,12 +35541,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -35557,10 +35557,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -35571,10 +35571,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(1)
@@ -35585,13 +35585,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -35601,10 +35601,10 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -35615,10 +35615,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -35631,12 +35631,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -35648,11 +35648,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -35664,11 +35664,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -35679,11 +35679,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -35697,13 +35697,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -35714,11 +35714,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -35732,13 +35732,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -35749,11 +35749,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -35767,13 +35767,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35785,12 +35785,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35803,12 +35803,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35822,13 +35822,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35840,12 +35840,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35858,12 +35858,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35877,13 +35877,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -35895,11 +35895,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -35914,13 +35914,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35933,12 +35933,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -35951,12 +35951,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -35971,13 +35971,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -35990,11 +35990,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36009,12 +36009,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -36025,10 +36025,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -36039,10 +36039,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(2)
@@ -36053,13 +36053,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -36069,10 +36069,10 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -36083,10 +36083,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -36099,12 +36099,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -36116,11 +36116,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -36132,11 +36132,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -36147,11 +36147,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -36165,13 +36165,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -36182,11 +36182,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -36200,13 +36200,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -36217,11 +36217,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -36235,13 +36235,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36253,12 +36253,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36271,12 +36271,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36290,13 +36290,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36308,12 +36308,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36326,12 +36326,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36345,13 +36345,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -36363,11 +36363,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -36382,13 +36382,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36401,12 +36401,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36419,12 +36419,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -36439,13 +36439,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -36458,11 +36458,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36477,12 +36477,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -36493,10 +36493,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -36507,10 +36507,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(3)
@@ -36521,13 +36521,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_eq_8) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -36537,10 +36537,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -36551,10 +36551,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -36567,12 +36567,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -36584,11 +36584,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -36600,11 +36600,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_lt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -36615,11 +36615,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -36633,13 +36633,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_gt_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -36650,11 +36650,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -36668,13 +36668,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_div_8) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -36685,11 +36685,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -36703,13 +36703,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_gt_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36721,12 +36721,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36739,12 +36739,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36758,13 +36758,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_div_16) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36776,12 +36776,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36794,12 +36794,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36813,13 +36813,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -36831,11 +36831,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -36850,13 +36850,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36869,12 +36869,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36887,12 +36887,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -36907,13 +36907,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, a_offset) {
     TEST_REQUIRES_ARM_NEON;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -36926,11 +36926,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, zero) {
     TEST_REQUIRES_ARM_NEON;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -36945,12 +36945,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, qmin) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -36961,10 +36961,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, qmax) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -36975,10 +36975,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16__NEON_MULL_ADDW_DUP, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, strided_cm) {
     TEST_REQUIRES_ARM_NEON;
     GemmMicrokernelTester()
       .mr(4)
@@ -36989,13 +36989,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -37005,10 +37005,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -37019,10 +37019,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -37035,12 +37035,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -37052,11 +37052,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -37068,11 +37068,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -37083,11 +37083,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -37101,13 +37101,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -37118,11 +37118,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -37136,13 +37136,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -37153,11 +37153,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -37171,13 +37171,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37189,12 +37189,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37207,12 +37207,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37226,13 +37226,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37244,12 +37244,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37262,12 +37262,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37281,13 +37281,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -37299,11 +37299,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -37318,13 +37318,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37337,12 +37337,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37355,12 +37355,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -37375,13 +37375,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, a_offset) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -37394,11 +37394,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, zero) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37413,12 +37413,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -37429,10 +37429,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -37443,10 +37443,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C4__NEONDOT, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -37457,13 +37457,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -37473,10 +37473,10 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -37487,10 +37487,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -37503,12 +37503,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -37520,11 +37520,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -37536,11 +37536,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -37551,11 +37551,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -37569,13 +37569,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -37586,11 +37586,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -37604,13 +37604,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -37621,11 +37621,11 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -37639,13 +37639,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37657,12 +37657,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37675,12 +37675,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37694,13 +37694,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37712,12 +37712,12 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37730,12 +37730,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37749,13 +37749,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -37767,11 +37767,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -37786,13 +37786,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37805,12 +37805,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37823,12 +37823,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -37843,13 +37843,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, a_offset) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -37862,11 +37862,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, zero) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -37881,12 +37881,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -37897,10 +37897,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -37911,10 +37911,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X8C4__NEONDOT, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -37925,13 +37925,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -37941,10 +37941,10 @@
       .m(6)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -37955,10 +37955,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 6; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -37971,12 +37971,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 6; m++) {
       GemmMicrokernelTester()
@@ -37988,11 +37988,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -38004,11 +38004,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -38019,11 +38019,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -38037,13 +38037,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -38054,11 +38054,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -38072,13 +38072,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -38089,11 +38089,11 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -38107,13 +38107,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38125,12 +38125,12 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38143,12 +38143,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38162,13 +38162,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38180,12 +38180,12 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38198,12 +38198,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38217,13 +38217,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -38235,11 +38235,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -38254,13 +38254,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38273,12 +38273,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38291,12 +38291,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -38311,13 +38311,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, a_offset) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -38330,11 +38330,11 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, zero) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t mz = 0; mz < 6; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38349,12 +38349,12 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -38365,10 +38365,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -38379,10 +38379,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X8C4__NEONDOT, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -38393,13 +38393,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -38409,10 +38409,10 @@
       .m(8)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -38423,10 +38423,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 8; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -38439,12 +38439,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 8; m++) {
       GemmMicrokernelTester()
@@ -38456,11 +38456,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -38472,11 +38472,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -38487,11 +38487,11 @@
         .m(8)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -38505,13 +38505,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -38522,11 +38522,11 @@
         .m(8)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -38540,13 +38540,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -38557,11 +38557,11 @@
         .m(8)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -38575,13 +38575,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38593,12 +38593,12 @@
           .m(8)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_gt_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38611,12 +38611,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38630,13 +38630,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38648,12 +38648,12 @@
           .m(8)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_div_8_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38666,12 +38666,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38685,13 +38685,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -38703,11 +38703,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -38722,13 +38722,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_gt_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38741,12 +38741,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, n_div_8_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38759,12 +38759,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -38779,13 +38779,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, a_offset) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -38798,11 +38798,11 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, zero) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t mz = 0; mz < 8; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -38817,12 +38817,12 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -38833,10 +38833,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -38847,10 +38847,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X8C4__NEONDOT, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -38861,13 +38861,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -38877,10 +38877,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -38891,10 +38891,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -38907,12 +38907,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -38924,11 +38924,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -38940,11 +38940,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -38955,11 +38955,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -38973,13 +38973,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -38990,11 +38990,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -39008,13 +39008,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -39025,11 +39025,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -39043,13 +39043,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39061,12 +39061,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39079,12 +39079,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39098,13 +39098,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39116,12 +39116,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39134,12 +39134,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39153,13 +39153,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -39171,11 +39171,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -39190,13 +39190,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39209,12 +39209,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39227,12 +39227,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -39247,13 +39247,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, a_offset) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -39266,11 +39266,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, zero) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39285,12 +39285,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -39301,10 +39301,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -39315,10 +39315,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C4__NEONDOT, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(1)
@@ -39329,13 +39329,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -39345,10 +39345,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -39359,10 +39359,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -39375,12 +39375,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -39392,11 +39392,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -39408,11 +39408,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -39423,11 +39423,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -39441,13 +39441,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -39458,11 +39458,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -39476,13 +39476,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -39493,11 +39493,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -39511,13 +39511,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39529,12 +39529,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39547,12 +39547,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39566,13 +39566,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39584,12 +39584,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39602,12 +39602,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39621,13 +39621,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -39639,11 +39639,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -39658,13 +39658,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39677,12 +39677,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39695,12 +39695,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -39715,13 +39715,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, a_offset) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -39734,11 +39734,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, zero) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39753,12 +39753,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -39769,10 +39769,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -39783,10 +39783,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__NEONDOT, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -39797,13 +39797,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -39813,10 +39813,10 @@
       .m(6)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -39827,10 +39827,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 6; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -39843,12 +39843,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 6; m++) {
       GemmMicrokernelTester()
@@ -39860,11 +39860,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -39876,11 +39876,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -39891,11 +39891,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -39909,13 +39909,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -39926,11 +39926,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -39944,13 +39944,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -39961,11 +39961,11 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -39979,13 +39979,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -39997,12 +39997,12 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40015,12 +40015,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40034,13 +40034,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40052,12 +40052,12 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40070,12 +40070,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40089,13 +40089,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -40107,11 +40107,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -40126,13 +40126,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40145,12 +40145,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40163,12 +40163,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 6; m++) {
@@ -40183,13 +40183,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, a_offset) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -40202,11 +40202,11 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, zero) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t mz = 0; mz < 6; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40221,12 +40221,12 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -40237,10 +40237,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -40251,10 +40251,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_6X16C4__NEONDOT, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(6)
@@ -40265,13 +40265,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -40281,10 +40281,10 @@
       .m(8)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -40295,10 +40295,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 8; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -40311,12 +40311,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 8; m++) {
       GemmMicrokernelTester()
@@ -40328,11 +40328,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -40344,11 +40344,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -40359,11 +40359,11 @@
         .m(8)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -40377,13 +40377,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -40394,11 +40394,11 @@
         .m(8)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -40412,13 +40412,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -40429,11 +40429,11 @@
         .m(8)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -40447,13 +40447,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40465,12 +40465,12 @@
           .m(8)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40483,12 +40483,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40502,13 +40502,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40520,12 +40520,12 @@
           .m(8)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40538,12 +40538,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40557,13 +40557,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -40575,11 +40575,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -40594,13 +40594,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40613,12 +40613,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40631,12 +40631,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 8; m++) {
@@ -40651,13 +40651,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, a_offset) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -40670,11 +40670,11 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, zero) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t mz = 0; mz < 8; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40689,12 +40689,12 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -40705,10 +40705,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -40719,10 +40719,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_8X16C4__NEONDOT, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(8)
@@ -40733,13 +40733,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -40749,10 +40749,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -40763,10 +40763,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -40779,12 +40779,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -40796,11 +40796,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -40812,11 +40812,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -40827,11 +40827,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -40845,13 +40845,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -40862,11 +40862,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -40880,13 +40880,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_div_8) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -40897,11 +40897,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_div_8_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -40915,13 +40915,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40933,12 +40933,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40951,12 +40951,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40970,13 +40970,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -40988,12 +40988,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41006,12 +41006,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41025,13 +41025,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -41043,11 +41043,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41062,13 +41062,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41081,12 +41081,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41099,12 +41099,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41119,13 +41119,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, a_offset) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -41138,11 +41138,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, zero) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41157,12 +41157,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41173,10 +41173,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41187,10 +41187,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41201,13 +41201,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_ARM64
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41217,10 +41217,10 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41231,10 +41231,10 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -41247,12 +41247,12 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -41264,11 +41264,11 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -41280,11 +41280,11 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 16; k++) {
       GemmMicrokernelTester()
@@ -41295,11 +41295,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41313,13 +41313,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 17; k < 32; k++) {
       GemmMicrokernelTester()
@@ -41330,11 +41330,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 17; k < 32; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41348,13 +41348,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 32; k <= 160; k += 16) {
       GemmMicrokernelTester()
@@ -41365,11 +41365,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 32; k <= 160; k += 16) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41383,13 +41383,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -41401,12 +41401,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -41419,12 +41419,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -41438,13 +41438,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -41456,12 +41456,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -41474,12 +41474,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -41493,13 +41493,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -41511,11 +41511,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41530,13 +41530,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -41549,12 +41549,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_small_kernel) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -41567,12 +41567,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 80; k += 17) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -41587,13 +41587,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (size_t k = 1; k <= 80; k += 17) {
       GemmMicrokernelTester()
@@ -41606,11 +41606,11 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, zero) {
     TEST_REQUIRES_ARM_NEON_DOT;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 80; k += 17) {
@@ -41625,12 +41625,12 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41641,10 +41641,10 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41655,10 +41655,10 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
     TEST_REQUIRES_ARM_NEON_DOT;
     GemmMicrokernelTester()
       .mr(4)
@@ -41669,13 +41669,13 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_gemm_neon_params);
   }
 #endif  // XNN_ARCH_ARM64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -41685,10 +41685,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -41699,10 +41699,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -41715,12 +41715,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -41732,11 +41732,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -41748,11 +41748,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -41763,11 +41763,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -41781,13 +41781,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -41798,11 +41798,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -41816,13 +41816,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -41833,11 +41833,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -41851,13 +41851,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41869,12 +41869,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41887,12 +41887,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41906,13 +41906,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41924,12 +41924,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41942,12 +41942,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -41961,13 +41961,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -41979,11 +41979,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -41998,13 +41998,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42017,12 +42017,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42035,12 +42035,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -42055,13 +42055,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, a_offset) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -42074,11 +42074,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, zero) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42093,12 +42093,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -42109,10 +42109,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -42123,10 +42123,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -42137,13 +42137,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -42153,10 +42153,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -42167,10 +42167,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -42183,12 +42183,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -42200,11 +42200,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -42216,11 +42216,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -42231,11 +42231,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -42249,13 +42249,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -42266,11 +42266,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -42284,13 +42284,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -42301,11 +42301,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -42319,13 +42319,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42337,12 +42337,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42355,12 +42355,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42374,13 +42374,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42392,12 +42392,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42410,12 +42410,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42429,13 +42429,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -42447,11 +42447,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -42466,13 +42466,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42485,12 +42485,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42503,12 +42503,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -42523,13 +42523,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, a_offset) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -42542,11 +42542,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, zero) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42561,12 +42561,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -42577,10 +42577,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -42591,10 +42591,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -42605,13 +42605,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -42621,10 +42621,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -42635,10 +42635,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -42651,12 +42651,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -42668,11 +42668,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -42684,11 +42684,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -42699,11 +42699,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -42717,13 +42717,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -42734,11 +42734,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -42752,13 +42752,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -42769,11 +42769,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -42787,13 +42787,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42805,12 +42805,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42823,12 +42823,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42842,13 +42842,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42860,12 +42860,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42878,12 +42878,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42897,13 +42897,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -42915,11 +42915,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -42934,13 +42934,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42953,12 +42953,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -42971,12 +42971,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -42991,13 +42991,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, a_offset) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -43010,11 +43010,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, zero) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43029,12 +43029,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -43045,10 +43045,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -43059,10 +43059,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -43073,13 +43073,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -43089,10 +43089,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -43103,10 +43103,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -43119,12 +43119,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -43136,11 +43136,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -43152,11 +43152,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -43167,11 +43167,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -43185,13 +43185,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -43202,11 +43202,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -43220,13 +43220,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -43237,11 +43237,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -43255,13 +43255,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43273,12 +43273,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43291,12 +43291,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43310,13 +43310,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43328,12 +43328,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43346,12 +43346,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43365,13 +43365,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -43383,11 +43383,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -43402,13 +43402,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43421,12 +43421,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43439,12 +43439,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -43459,13 +43459,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, a_offset) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -43478,11 +43478,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, zero) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43497,12 +43497,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -43513,10 +43513,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -43527,10 +43527,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -43541,13 +43541,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -43557,10 +43557,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -43571,10 +43571,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -43587,12 +43587,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -43604,11 +43604,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -43620,11 +43620,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -43635,11 +43635,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -43653,13 +43653,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -43670,11 +43670,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -43688,13 +43688,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -43705,11 +43705,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -43723,13 +43723,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43741,12 +43741,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43759,12 +43759,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43778,13 +43778,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43796,12 +43796,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43814,12 +43814,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43833,13 +43833,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -43851,11 +43851,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -43870,13 +43870,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43889,12 +43889,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43907,12 +43907,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -43927,13 +43927,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, a_offset) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -43946,11 +43946,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, zero) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -43965,12 +43965,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -43981,10 +43981,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -43995,10 +43995,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -44009,13 +44009,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -44025,10 +44025,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -44039,10 +44039,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -44055,12 +44055,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -44072,11 +44072,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -44088,11 +44088,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -44103,11 +44103,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -44121,13 +44121,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -44138,11 +44138,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -44156,13 +44156,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -44173,11 +44173,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -44191,13 +44191,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44209,12 +44209,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44227,12 +44227,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44246,13 +44246,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44264,12 +44264,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44282,12 +44282,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44301,13 +44301,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -44319,11 +44319,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -44338,13 +44338,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44357,12 +44357,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44375,12 +44375,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -44395,13 +44395,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, a_offset) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -44414,11 +44414,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, zero) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44433,12 +44433,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -44449,10 +44449,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -44463,10 +44463,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -44477,13 +44477,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -44493,10 +44493,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -44507,10 +44507,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -44523,12 +44523,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -44540,11 +44540,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -44556,11 +44556,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -44571,11 +44571,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -44589,13 +44589,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -44606,11 +44606,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -44624,13 +44624,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -44641,11 +44641,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -44659,13 +44659,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44677,12 +44677,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44695,12 +44695,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44714,13 +44714,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44732,12 +44732,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44750,12 +44750,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44769,13 +44769,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -44787,11 +44787,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -44806,13 +44806,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44825,12 +44825,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44843,12 +44843,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -44863,13 +44863,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, a_offset) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -44882,11 +44882,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, zero) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -44901,12 +44901,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -44917,10 +44917,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -44931,10 +44931,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -44945,13 +44945,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -44961,10 +44961,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -44975,10 +44975,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -44991,12 +44991,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -45008,11 +45008,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -45024,11 +45024,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -45039,11 +45039,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -45057,13 +45057,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -45074,11 +45074,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -45092,13 +45092,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -45109,11 +45109,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -45127,13 +45127,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45145,12 +45145,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45163,12 +45163,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45182,13 +45182,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45200,12 +45200,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45218,12 +45218,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45237,13 +45237,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -45255,11 +45255,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -45274,13 +45274,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45293,12 +45293,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45311,12 +45311,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -45331,13 +45331,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, a_offset) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -45350,11 +45350,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, zero) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45369,12 +45369,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -45385,10 +45385,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -45399,10 +45399,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -45413,13 +45413,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -45429,10 +45429,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -45443,10 +45443,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -45459,12 +45459,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -45476,11 +45476,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -45492,11 +45492,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -45507,11 +45507,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -45525,13 +45525,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -45542,11 +45542,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -45560,13 +45560,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -45577,11 +45577,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -45595,13 +45595,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45613,12 +45613,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45631,12 +45631,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45650,13 +45650,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45668,12 +45668,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45686,12 +45686,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45705,13 +45705,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -45723,11 +45723,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -45742,13 +45742,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45761,12 +45761,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45779,12 +45779,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -45799,13 +45799,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, a_offset) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -45818,11 +45818,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, zero) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -45837,12 +45837,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -45853,10 +45853,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -45867,10 +45867,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -45881,13 +45881,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -45897,10 +45897,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -45911,10 +45911,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -45927,12 +45927,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -45944,11 +45944,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -45960,11 +45960,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -45975,11 +45975,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -45993,13 +45993,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -46010,11 +46010,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -46028,13 +46028,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -46045,11 +46045,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -46063,13 +46063,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46081,12 +46081,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46099,12 +46099,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46118,13 +46118,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46136,12 +46136,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46154,12 +46154,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46173,13 +46173,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -46191,11 +46191,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -46210,13 +46210,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46229,12 +46229,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46247,12 +46247,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -46267,13 +46267,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, a_offset) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -46286,11 +46286,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, zero) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46305,12 +46305,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -46321,10 +46321,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -46335,10 +46335,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -46349,13 +46349,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -46365,10 +46365,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -46379,10 +46379,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -46395,12 +46395,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -46412,11 +46412,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -46428,11 +46428,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -46443,11 +46443,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -46461,13 +46461,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -46478,11 +46478,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -46496,13 +46496,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -46513,11 +46513,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -46531,13 +46531,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46549,12 +46549,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46567,12 +46567,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46586,13 +46586,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46604,12 +46604,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46622,12 +46622,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46641,13 +46641,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -46659,11 +46659,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -46678,13 +46678,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46697,12 +46697,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46715,12 +46715,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -46735,13 +46735,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, a_offset) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -46754,11 +46754,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, zero) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -46773,12 +46773,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -46789,10 +46789,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -46803,10 +46803,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -46817,13 +46817,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -46833,10 +46833,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -46847,10 +46847,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -46863,12 +46863,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -46880,11 +46880,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -46896,11 +46896,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -46911,11 +46911,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -46929,13 +46929,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -46946,11 +46946,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -46964,13 +46964,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -46981,11 +46981,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -46999,13 +46999,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47017,12 +47017,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47035,12 +47035,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47054,13 +47054,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47072,12 +47072,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47090,12 +47090,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47109,13 +47109,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -47127,11 +47127,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -47146,13 +47146,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47165,12 +47165,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47183,12 +47183,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -47203,13 +47203,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, a_offset) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -47222,11 +47222,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, zero) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47241,12 +47241,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -47257,10 +47257,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -47271,10 +47271,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -47285,13 +47285,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -47301,10 +47301,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -47315,10 +47315,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -47331,12 +47331,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -47348,11 +47348,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -47364,11 +47364,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -47379,11 +47379,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -47397,13 +47397,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -47414,11 +47414,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -47432,13 +47432,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -47449,11 +47449,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -47467,13 +47467,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47485,12 +47485,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47503,12 +47503,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47522,13 +47522,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47540,12 +47540,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47558,12 +47558,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47577,13 +47577,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -47595,11 +47595,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -47614,13 +47614,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47633,12 +47633,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47651,12 +47651,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -47671,13 +47671,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, a_offset) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -47690,11 +47690,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, zero) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47709,12 +47709,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -47725,10 +47725,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -47739,10 +47739,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -47753,13 +47753,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -47769,10 +47769,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -47783,10 +47783,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -47799,12 +47799,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -47816,11 +47816,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -47832,11 +47832,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -47847,11 +47847,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -47865,13 +47865,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -47882,11 +47882,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -47900,13 +47900,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -47917,11 +47917,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -47935,13 +47935,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47953,12 +47953,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47971,12 +47971,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -47990,13 +47990,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48008,12 +48008,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48026,12 +48026,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48045,13 +48045,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -48063,11 +48063,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -48082,13 +48082,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48101,12 +48101,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48119,12 +48119,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -48139,13 +48139,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, a_offset) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -48158,11 +48158,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, zero) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48177,12 +48177,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -48193,10 +48193,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -48207,10 +48207,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -48221,13 +48221,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -48237,10 +48237,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -48251,10 +48251,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -48267,12 +48267,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -48284,11 +48284,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -48300,11 +48300,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -48315,11 +48315,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -48333,13 +48333,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -48350,11 +48350,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -48368,13 +48368,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -48385,11 +48385,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -48403,13 +48403,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48421,12 +48421,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48439,12 +48439,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48458,13 +48458,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48476,12 +48476,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48494,12 +48494,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48513,13 +48513,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -48531,11 +48531,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -48550,13 +48550,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48569,12 +48569,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48587,12 +48587,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -48607,13 +48607,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, a_offset) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -48626,11 +48626,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, zero) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48645,12 +48645,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -48661,10 +48661,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -48675,10 +48675,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -48689,13 +48689,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -48705,10 +48705,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -48719,10 +48719,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -48735,12 +48735,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -48752,11 +48752,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -48768,11 +48768,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -48783,11 +48783,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -48801,13 +48801,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -48818,11 +48818,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -48836,13 +48836,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -48853,11 +48853,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -48871,13 +48871,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48889,12 +48889,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48907,12 +48907,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48926,13 +48926,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48944,12 +48944,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48962,12 +48962,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -48981,13 +48981,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -48999,11 +48999,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -49018,13 +49018,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49037,12 +49037,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49055,12 +49055,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -49075,13 +49075,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, a_offset) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -49094,11 +49094,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, zero) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49113,12 +49113,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -49129,10 +49129,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -49143,10 +49143,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -49157,13 +49157,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -49173,10 +49173,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -49187,10 +49187,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -49203,12 +49203,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -49220,11 +49220,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -49236,11 +49236,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -49251,11 +49251,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -49269,13 +49269,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -49286,11 +49286,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -49304,13 +49304,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -49321,11 +49321,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -49339,13 +49339,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49357,12 +49357,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49375,12 +49375,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49394,13 +49394,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49412,12 +49412,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49430,12 +49430,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49449,13 +49449,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -49467,11 +49467,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -49486,13 +49486,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49505,12 +49505,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49523,12 +49523,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -49543,13 +49543,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, a_offset) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -49562,11 +49562,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, zero) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49581,12 +49581,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -49597,10 +49597,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -49611,10 +49611,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE2_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -49625,13 +49625,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -49641,10 +49641,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -49655,10 +49655,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -49671,12 +49671,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -49688,11 +49688,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -49704,11 +49704,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -49719,11 +49719,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -49737,13 +49737,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -49754,11 +49754,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -49772,13 +49772,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -49789,11 +49789,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -49807,13 +49807,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49825,12 +49825,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49843,12 +49843,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49862,13 +49862,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49880,12 +49880,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49898,12 +49898,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49917,13 +49917,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -49935,11 +49935,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -49954,13 +49954,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49973,12 +49973,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -49991,12 +49991,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -50011,13 +50011,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, a_offset) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -50030,11 +50030,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, zero) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50049,12 +50049,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -50065,10 +50065,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -50079,10 +50079,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE2_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -50093,13 +50093,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -50109,10 +50109,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -50123,10 +50123,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -50139,12 +50139,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -50156,11 +50156,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -50172,11 +50172,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -50187,11 +50187,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -50205,13 +50205,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -50222,11 +50222,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -50240,13 +50240,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -50257,11 +50257,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -50275,13 +50275,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50293,12 +50293,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50311,12 +50311,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50330,13 +50330,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50348,12 +50348,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50366,12 +50366,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50385,13 +50385,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -50403,11 +50403,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -50422,13 +50422,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50441,12 +50441,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50459,12 +50459,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -50479,13 +50479,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, a_offset) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -50498,11 +50498,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, zero) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50517,12 +50517,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -50533,10 +50533,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -50547,10 +50547,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE2_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -50561,13 +50561,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -50577,10 +50577,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -50591,10 +50591,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -50607,12 +50607,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -50624,11 +50624,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -50640,11 +50640,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -50655,11 +50655,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -50673,13 +50673,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -50690,11 +50690,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -50708,13 +50708,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -50725,11 +50725,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -50743,13 +50743,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50761,12 +50761,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50779,12 +50779,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50798,13 +50798,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50816,12 +50816,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50834,12 +50834,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50853,13 +50853,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -50871,11 +50871,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -50890,13 +50890,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50909,12 +50909,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50927,12 +50927,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -50947,13 +50947,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, a_offset) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -50966,11 +50966,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, zero) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -50985,12 +50985,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -51001,10 +51001,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -51015,10 +51015,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE2_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(4)
@@ -51029,13 +51029,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -51045,10 +51045,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -51059,10 +51059,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -51075,12 +51075,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -51092,11 +51092,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -51108,11 +51108,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -51123,11 +51123,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -51141,13 +51141,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -51158,11 +51158,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -51176,13 +51176,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -51193,11 +51193,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -51211,13 +51211,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51229,12 +51229,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51247,12 +51247,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51266,13 +51266,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51284,12 +51284,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51302,12 +51302,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51321,13 +51321,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -51339,11 +51339,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -51358,13 +51358,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51377,12 +51377,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51395,12 +51395,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -51415,13 +51415,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, a_offset) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -51434,11 +51434,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, zero) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51453,12 +51453,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -51469,10 +51469,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -51483,10 +51483,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSSE3_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -51497,13 +51497,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -51513,10 +51513,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -51527,10 +51527,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -51543,12 +51543,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -51560,11 +51560,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -51576,11 +51576,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -51591,11 +51591,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -51609,13 +51609,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -51626,11 +51626,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -51644,13 +51644,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -51661,11 +51661,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -51679,13 +51679,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51697,12 +51697,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51715,12 +51715,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51734,13 +51734,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51752,12 +51752,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51770,12 +51770,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51789,13 +51789,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -51807,11 +51807,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -51826,13 +51826,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51845,12 +51845,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51863,12 +51863,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -51883,13 +51883,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, a_offset) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -51902,11 +51902,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, zero) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -51921,12 +51921,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -51937,10 +51937,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -51951,10 +51951,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSSE3_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -51965,13 +51965,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -51981,10 +51981,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -51995,10 +51995,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -52011,12 +52011,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -52028,11 +52028,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -52044,11 +52044,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -52059,11 +52059,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -52077,13 +52077,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -52094,11 +52094,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -52112,13 +52112,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -52129,11 +52129,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -52147,13 +52147,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52165,12 +52165,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52183,12 +52183,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52202,13 +52202,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52220,12 +52220,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52238,12 +52238,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52257,13 +52257,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -52275,11 +52275,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -52294,13 +52294,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52313,12 +52313,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52331,12 +52331,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -52351,13 +52351,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, a_offset) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -52370,11 +52370,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, zero) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52389,12 +52389,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -52405,10 +52405,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -52419,10 +52419,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSSE3_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -52433,13 +52433,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -52449,10 +52449,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -52463,10 +52463,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -52479,12 +52479,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -52496,11 +52496,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -52512,11 +52512,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -52527,11 +52527,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -52545,13 +52545,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -52562,11 +52562,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -52580,13 +52580,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -52597,11 +52597,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -52615,13 +52615,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52633,12 +52633,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52651,12 +52651,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52670,13 +52670,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52688,12 +52688,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52706,12 +52706,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52725,13 +52725,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -52743,11 +52743,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -52762,13 +52762,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52781,12 +52781,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52799,12 +52799,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -52819,13 +52819,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, a_offset) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -52838,11 +52838,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, zero) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -52857,12 +52857,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -52873,10 +52873,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -52887,10 +52887,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSSE3_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(4)
@@ -52901,13 +52901,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -52917,10 +52917,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -52931,10 +52931,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -52947,12 +52947,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -52964,11 +52964,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -52980,11 +52980,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -52995,11 +52995,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -53013,13 +53013,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -53030,11 +53030,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -53048,13 +53048,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -53065,11 +53065,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -53083,13 +53083,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53101,12 +53101,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53119,12 +53119,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53138,13 +53138,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53156,12 +53156,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53174,12 +53174,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53193,13 +53193,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -53211,11 +53211,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -53230,13 +53230,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53249,12 +53249,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53267,12 +53267,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -53287,13 +53287,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, a_offset) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -53306,11 +53306,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, zero) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53325,12 +53325,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -53341,10 +53341,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -53355,10 +53355,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__SSE41_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -53369,13 +53369,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -53385,10 +53385,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -53399,10 +53399,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -53415,12 +53415,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -53432,11 +53432,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -53448,11 +53448,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -53463,11 +53463,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -53481,13 +53481,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -53498,11 +53498,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -53516,13 +53516,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -53533,11 +53533,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -53551,13 +53551,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53569,12 +53569,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53587,12 +53587,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53606,13 +53606,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53624,12 +53624,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53642,12 +53642,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53661,13 +53661,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -53679,11 +53679,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -53698,13 +53698,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53717,12 +53717,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53735,12 +53735,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -53755,13 +53755,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, a_offset) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -53774,11 +53774,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, zero) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -53793,12 +53793,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -53809,10 +53809,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -53823,10 +53823,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__SSE41_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -53837,13 +53837,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -53853,10 +53853,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -53867,10 +53867,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -53883,12 +53883,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -53900,11 +53900,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -53916,11 +53916,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -53931,11 +53931,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -53949,13 +53949,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -53966,11 +53966,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -53984,13 +53984,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -54001,11 +54001,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -54019,13 +54019,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54037,12 +54037,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54055,12 +54055,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54074,13 +54074,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54092,12 +54092,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54110,12 +54110,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54129,13 +54129,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -54147,11 +54147,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -54166,13 +54166,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54185,12 +54185,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54203,12 +54203,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -54223,13 +54223,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, a_offset) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -54242,11 +54242,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, zero) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54261,12 +54261,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -54277,10 +54277,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -54291,10 +54291,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__SSE41_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -54305,13 +54305,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -54321,10 +54321,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -54335,10 +54335,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -54351,12 +54351,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -54368,11 +54368,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -54384,11 +54384,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -54399,11 +54399,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -54417,13 +54417,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -54434,11 +54434,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -54452,13 +54452,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -54469,11 +54469,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -54487,13 +54487,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54505,12 +54505,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54523,12 +54523,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54542,13 +54542,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54560,12 +54560,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54578,12 +54578,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54597,13 +54597,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -54615,11 +54615,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -54634,13 +54634,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54653,12 +54653,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54671,12 +54671,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -54691,13 +54691,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, a_offset) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -54710,11 +54710,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, zero) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54729,12 +54729,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -54745,10 +54745,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -54759,10 +54759,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__SSE41_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(4)
@@ -54773,13 +54773,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -54789,10 +54789,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -54803,10 +54803,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -54819,12 +54819,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -54836,11 +54836,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -54852,11 +54852,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -54867,11 +54867,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -54885,13 +54885,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -54902,11 +54902,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -54920,13 +54920,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -54937,11 +54937,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -54955,13 +54955,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54973,12 +54973,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -54991,12 +54991,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55010,13 +55010,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55028,12 +55028,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55046,12 +55046,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55065,13 +55065,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -55083,11 +55083,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -55102,13 +55102,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55121,12 +55121,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55139,12 +55139,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -55159,13 +55159,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, a_offset) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -55178,11 +55178,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, zero) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55197,12 +55197,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -55213,10 +55213,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -55227,10 +55227,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__AVX_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -55241,13 +55241,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -55257,10 +55257,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -55271,10 +55271,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -55287,12 +55287,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -55304,11 +55304,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -55320,11 +55320,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -55335,11 +55335,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -55353,13 +55353,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -55370,11 +55370,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -55388,13 +55388,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -55405,11 +55405,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -55423,13 +55423,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55441,12 +55441,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55459,12 +55459,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55478,13 +55478,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55496,12 +55496,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55514,12 +55514,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55533,13 +55533,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -55551,11 +55551,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -55570,13 +55570,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55589,12 +55589,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55607,12 +55607,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -55627,13 +55627,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, a_offset) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -55646,11 +55646,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, zero) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55665,12 +55665,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -55681,10 +55681,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -55695,10 +55695,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__AVX_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -55709,13 +55709,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -55725,10 +55725,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -55739,10 +55739,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -55755,12 +55755,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -55772,11 +55772,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -55788,11 +55788,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -55803,11 +55803,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -55821,13 +55821,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -55838,11 +55838,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -55856,13 +55856,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -55873,11 +55873,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -55891,13 +55891,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55909,12 +55909,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55927,12 +55927,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55946,13 +55946,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55964,12 +55964,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -55982,12 +55982,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56001,13 +56001,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -56019,11 +56019,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -56038,13 +56038,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56057,12 +56057,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56075,12 +56075,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -56095,13 +56095,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, a_offset) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -56114,11 +56114,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, zero) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56133,12 +56133,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -56149,10 +56149,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -56163,10 +56163,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__AVX_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -56177,13 +56177,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -56193,10 +56193,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -56207,10 +56207,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -56223,12 +56223,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -56240,11 +56240,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -56256,11 +56256,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -56271,11 +56271,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -56289,13 +56289,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -56306,11 +56306,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -56324,13 +56324,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -56341,11 +56341,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -56359,13 +56359,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56377,12 +56377,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56395,12 +56395,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56414,13 +56414,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56432,12 +56432,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56450,12 +56450,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56469,13 +56469,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -56487,11 +56487,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -56506,13 +56506,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56525,12 +56525,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56543,12 +56543,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -56563,13 +56563,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, a_offset) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -56582,11 +56582,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, zero) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56601,12 +56601,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -56617,10 +56617,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -56631,10 +56631,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__AVX_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(4)
@@ -56645,13 +56645,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -56661,10 +56661,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -56675,10 +56675,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -56691,12 +56691,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -56708,11 +56708,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -56724,11 +56724,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -56739,11 +56739,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -56757,13 +56757,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -56774,11 +56774,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -56792,13 +56792,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -56809,11 +56809,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -56827,13 +56827,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56845,12 +56845,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56863,12 +56863,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56882,13 +56882,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56900,12 +56900,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56918,12 +56918,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56937,13 +56937,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -56955,11 +56955,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -56974,13 +56974,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -56993,12 +56993,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57011,12 +57011,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -57031,13 +57031,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, a_offset) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -57050,11 +57050,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, zero) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57069,12 +57069,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -57085,10 +57085,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -57099,10 +57099,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C2__XOP_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -57113,13 +57113,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -57129,10 +57129,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -57143,10 +57143,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -57159,12 +57159,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -57176,11 +57176,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -57192,11 +57192,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -57207,11 +57207,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -57225,13 +57225,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -57242,11 +57242,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -57260,13 +57260,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -57277,11 +57277,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -57295,13 +57295,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57313,12 +57313,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57331,12 +57331,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57350,13 +57350,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57368,12 +57368,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57386,12 +57386,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57405,13 +57405,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -57423,11 +57423,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -57442,13 +57442,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57461,12 +57461,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57479,12 +57479,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -57499,13 +57499,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, a_offset) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -57518,11 +57518,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, zero) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57537,12 +57537,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -57553,10 +57553,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -57567,10 +57567,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C2__XOP_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -57581,13 +57581,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -57597,10 +57597,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -57611,10 +57611,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -57627,12 +57627,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -57644,11 +57644,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -57660,11 +57660,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -57675,11 +57675,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -57693,13 +57693,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -57710,11 +57710,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -57728,13 +57728,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -57745,11 +57745,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -57763,13 +57763,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57781,12 +57781,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57799,12 +57799,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57818,13 +57818,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57836,12 +57836,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57854,12 +57854,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57873,13 +57873,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -57891,11 +57891,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -57910,13 +57910,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57929,12 +57929,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -57947,12 +57947,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -57967,13 +57967,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, a_offset) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -57986,11 +57986,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, zero) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58005,12 +58005,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -58021,10 +58021,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -58035,10 +58035,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C2__XOP_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -58049,13 +58049,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -58065,10 +58065,10 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -58079,10 +58079,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -58095,12 +58095,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -58112,11 +58112,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -58128,11 +58128,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -58143,11 +58143,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -58161,13 +58161,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -58178,11 +58178,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -58196,13 +58196,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -58213,11 +58213,11 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -58231,13 +58231,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58249,12 +58249,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58267,12 +58267,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58286,13 +58286,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58304,12 +58304,12 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58322,12 +58322,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58341,13 +58341,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -58359,11 +58359,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -58378,13 +58378,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58397,12 +58397,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58415,12 +58415,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -58435,13 +58435,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, a_offset) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -58454,11 +58454,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, zero) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58473,12 +58473,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -58489,10 +58489,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -58503,10 +58503,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X4C2__XOP_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(4)
@@ -58517,13 +58517,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -58533,10 +58533,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -58547,10 +58547,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -58563,12 +58563,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -58580,11 +58580,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -58596,11 +58596,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -58611,11 +58611,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -58629,13 +58629,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -58646,11 +58646,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -58664,13 +58664,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -58681,11 +58681,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -58699,13 +58699,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58717,12 +58717,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58735,12 +58735,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58754,13 +58754,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58772,12 +58772,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58790,12 +58790,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58809,13 +58809,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -58827,11 +58827,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -58846,13 +58846,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58865,12 +58865,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58883,12 +58883,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -58903,13 +58903,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, a_offset) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -58922,11 +58922,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, zero) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -58941,12 +58941,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -58957,10 +58957,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -58971,10 +58971,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -58985,13 +58985,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -59001,10 +59001,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -59015,10 +59015,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -59031,12 +59031,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -59048,11 +59048,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -59064,11 +59064,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -59079,11 +59079,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -59097,13 +59097,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -59114,11 +59114,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -59132,13 +59132,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -59149,11 +59149,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -59167,13 +59167,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59185,12 +59185,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59203,12 +59203,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59222,13 +59222,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59240,12 +59240,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59258,12 +59258,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59277,13 +59277,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -59295,11 +59295,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -59314,13 +59314,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59333,12 +59333,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59351,12 +59351,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -59371,13 +59371,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, a_offset) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -59390,11 +59390,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, zero) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59409,12 +59409,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -59425,10 +59425,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -59439,10 +59439,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -59453,13 +59453,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -59469,10 +59469,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -59483,10 +59483,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -59499,12 +59499,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -59516,11 +59516,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -59532,11 +59532,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -59547,11 +59547,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -59565,13 +59565,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -59582,11 +59582,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -59600,13 +59600,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -59617,11 +59617,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -59635,13 +59635,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59653,12 +59653,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59671,12 +59671,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59690,13 +59690,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59708,12 +59708,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59726,12 +59726,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59745,13 +59745,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -59763,11 +59763,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -59782,13 +59782,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59801,12 +59801,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59819,12 +59819,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -59839,13 +59839,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, a_offset) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -59858,11 +59858,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, zero) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -59877,12 +59877,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -59893,10 +59893,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -59907,10 +59907,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -59921,13 +59921,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -59937,10 +59937,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -59951,10 +59951,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -59967,12 +59967,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -59984,11 +59984,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -60000,11 +60000,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -60015,11 +60015,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -60033,13 +60033,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -60050,11 +60050,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -60068,13 +60068,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -60085,11 +60085,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -60103,13 +60103,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60121,12 +60121,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60139,12 +60139,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60158,13 +60158,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60176,12 +60176,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60194,12 +60194,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60213,13 +60213,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -60231,11 +60231,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -60250,13 +60250,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60269,12 +60269,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60287,12 +60287,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -60307,13 +60307,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, a_offset) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -60326,11 +60326,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, zero) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60345,12 +60345,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -60361,10 +60361,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -60375,10 +60375,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -60389,13 +60389,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -60405,10 +60405,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -60419,10 +60419,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -60435,12 +60435,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -60452,11 +60452,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -60468,11 +60468,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -60483,11 +60483,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -60501,13 +60501,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -60518,11 +60518,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -60536,13 +60536,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -60553,11 +60553,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -60571,13 +60571,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60589,12 +60589,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60607,12 +60607,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60626,13 +60626,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60644,12 +60644,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60662,12 +60662,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60681,13 +60681,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -60699,11 +60699,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -60718,13 +60718,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60737,12 +60737,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60755,12 +60755,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -60775,13 +60775,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, a_offset) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -60794,11 +60794,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, zero) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -60813,12 +60813,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -60829,10 +60829,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -60843,10 +60843,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -60857,13 +60857,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -60873,10 +60873,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -60887,10 +60887,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -60903,12 +60903,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -60920,11 +60920,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -60936,11 +60936,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -60951,11 +60951,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -60969,13 +60969,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -60986,11 +60986,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -61004,13 +61004,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -61021,11 +61021,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -61039,13 +61039,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61057,12 +61057,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61075,12 +61075,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61094,13 +61094,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61112,12 +61112,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61130,12 +61130,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61149,13 +61149,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -61167,11 +61167,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -61186,13 +61186,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61205,12 +61205,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61223,12 +61223,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -61243,13 +61243,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, a_offset) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -61262,11 +61262,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, zero) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61281,12 +61281,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -61297,10 +61297,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -61311,10 +61311,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -61325,13 +61325,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -61341,10 +61341,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -61355,10 +61355,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -61371,12 +61371,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -61388,11 +61388,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -61404,11 +61404,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -61419,11 +61419,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -61437,13 +61437,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -61454,11 +61454,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -61472,13 +61472,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -61489,11 +61489,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -61507,13 +61507,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61525,12 +61525,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61543,12 +61543,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61562,13 +61562,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61580,12 +61580,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61598,12 +61598,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61617,13 +61617,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -61635,11 +61635,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -61654,13 +61654,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61673,12 +61673,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61691,12 +61691,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -61711,13 +61711,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, a_offset) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -61730,11 +61730,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, zero) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61749,12 +61749,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -61765,10 +61765,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -61779,10 +61779,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -61793,13 +61793,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -61809,10 +61809,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -61823,10 +61823,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -61839,12 +61839,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -61856,11 +61856,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -61872,11 +61872,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -61887,11 +61887,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -61905,13 +61905,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -61922,11 +61922,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -61940,13 +61940,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -61957,11 +61957,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -61975,13 +61975,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -61993,12 +61993,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62011,12 +62011,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62030,13 +62030,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62048,12 +62048,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62066,12 +62066,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62085,13 +62085,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -62103,11 +62103,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -62122,13 +62122,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62141,12 +62141,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62159,12 +62159,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -62179,13 +62179,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, a_offset) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -62198,11 +62198,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, zero) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62217,12 +62217,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -62233,10 +62233,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -62247,10 +62247,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -62261,13 +62261,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -62277,10 +62277,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -62291,10 +62291,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -62307,12 +62307,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -62324,11 +62324,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -62340,11 +62340,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -62355,11 +62355,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -62373,13 +62373,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -62390,11 +62390,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -62408,13 +62408,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -62425,11 +62425,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -62443,13 +62443,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62461,12 +62461,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62479,12 +62479,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62498,13 +62498,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62516,12 +62516,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62534,12 +62534,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62553,13 +62553,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -62571,11 +62571,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -62590,13 +62590,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62609,12 +62609,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62627,12 +62627,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -62647,13 +62647,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, a_offset) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -62666,11 +62666,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, zero) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62685,12 +62685,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -62701,10 +62701,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -62715,10 +62715,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -62729,13 +62729,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -62745,10 +62745,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -62759,10 +62759,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -62775,12 +62775,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -62792,11 +62792,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -62808,11 +62808,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -62823,11 +62823,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -62841,13 +62841,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -62858,11 +62858,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -62876,13 +62876,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -62893,11 +62893,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -62911,13 +62911,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62929,12 +62929,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62947,12 +62947,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62966,13 +62966,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -62984,12 +62984,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63002,12 +63002,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63021,13 +63021,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -63039,11 +63039,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -63058,13 +63058,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63077,12 +63077,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63095,12 +63095,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -63115,13 +63115,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, a_offset) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -63134,11 +63134,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, zero) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63153,12 +63153,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -63169,10 +63169,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -63183,10 +63183,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -63197,13 +63197,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -63213,10 +63213,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -63227,10 +63227,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -63243,12 +63243,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -63260,11 +63260,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -63276,11 +63276,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -63291,11 +63291,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -63309,13 +63309,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -63326,11 +63326,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -63344,13 +63344,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -63361,11 +63361,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -63379,13 +63379,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63397,12 +63397,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63415,12 +63415,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63434,13 +63434,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63452,12 +63452,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63470,12 +63470,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63489,13 +63489,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -63507,11 +63507,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -63526,13 +63526,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63545,12 +63545,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63563,12 +63563,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -63583,13 +63583,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, a_offset) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -63602,11 +63602,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, zero) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63621,12 +63621,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -63637,10 +63637,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -63651,10 +63651,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -63665,13 +63665,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -63681,10 +63681,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -63695,10 +63695,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -63711,12 +63711,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -63728,11 +63728,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -63744,11 +63744,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -63759,11 +63759,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -63777,13 +63777,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -63794,11 +63794,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -63812,13 +63812,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -63829,11 +63829,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -63847,13 +63847,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63865,12 +63865,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63883,12 +63883,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63902,13 +63902,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63920,12 +63920,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63938,12 +63938,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -63957,13 +63957,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -63975,11 +63975,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -63994,13 +63994,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64013,12 +64013,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64031,12 +64031,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -64051,13 +64051,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, a_offset) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -64070,11 +64070,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, zero) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64089,12 +64089,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -64105,10 +64105,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -64119,10 +64119,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -64133,13 +64133,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -64149,10 +64149,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -64163,10 +64163,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -64179,12 +64179,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -64196,11 +64196,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -64212,11 +64212,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -64227,11 +64227,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -64245,13 +64245,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -64262,11 +64262,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -64280,13 +64280,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -64297,11 +64297,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -64315,13 +64315,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64333,12 +64333,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64351,12 +64351,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64370,13 +64370,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64388,12 +64388,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64406,12 +64406,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64425,13 +64425,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -64443,11 +64443,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -64462,13 +64462,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64481,12 +64481,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64499,12 +64499,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -64519,13 +64519,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, a_offset) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -64538,11 +64538,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, zero) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64557,12 +64557,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -64573,10 +64573,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -64587,10 +64587,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -64601,13 +64601,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -64617,10 +64617,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -64631,10 +64631,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -64647,12 +64647,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -64664,11 +64664,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -64680,11 +64680,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -64695,11 +64695,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -64713,13 +64713,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -64730,11 +64730,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -64748,13 +64748,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -64765,11 +64765,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -64783,13 +64783,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64801,12 +64801,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64819,12 +64819,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64838,13 +64838,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64856,12 +64856,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64874,12 +64874,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64893,13 +64893,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -64911,11 +64911,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -64930,13 +64930,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64949,12 +64949,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -64967,12 +64967,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -64987,13 +64987,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, a_offset) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -65006,11 +65006,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, zero) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65025,12 +65025,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -65041,10 +65041,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -65055,10 +65055,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -65069,13 +65069,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -65085,10 +65085,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -65099,10 +65099,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -65115,12 +65115,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -65132,11 +65132,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -65148,11 +65148,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -65163,11 +65163,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -65181,13 +65181,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -65198,11 +65198,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -65216,13 +65216,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -65233,11 +65233,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -65251,13 +65251,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65269,12 +65269,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65287,12 +65287,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65306,13 +65306,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65324,12 +65324,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65342,12 +65342,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65361,13 +65361,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -65379,11 +65379,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, small_kernel_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -65398,13 +65398,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65417,12 +65417,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65435,12 +65435,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -65455,13 +65455,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, a_offset) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -65474,11 +65474,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, zero) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65493,12 +65493,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -65509,10 +65509,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -65523,10 +65523,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -65537,13 +65537,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -65553,10 +65553,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -65567,10 +65567,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -65583,12 +65583,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -65600,11 +65600,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -65616,11 +65616,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -65631,11 +65631,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -65649,13 +65649,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -65666,11 +65666,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -65684,13 +65684,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -65701,11 +65701,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -65719,13 +65719,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65737,12 +65737,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65755,12 +65755,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65774,13 +65774,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65792,12 +65792,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65810,12 +65810,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65829,13 +65829,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -65847,11 +65847,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -65866,13 +65866,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65885,12 +65885,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65903,12 +65903,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -65923,13 +65923,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, a_offset) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -65942,11 +65942,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, zero) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -65961,12 +65961,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -65977,10 +65977,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -65991,10 +65991,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE2_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(1)
@@ -66005,13 +66005,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -66021,10 +66021,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -66035,10 +66035,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -66051,12 +66051,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -66068,11 +66068,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -66084,11 +66084,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -66099,11 +66099,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -66117,13 +66117,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -66134,11 +66134,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -66152,13 +66152,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -66169,11 +66169,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -66187,13 +66187,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66205,12 +66205,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66223,12 +66223,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66242,13 +66242,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66260,12 +66260,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66278,12 +66278,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66297,13 +66297,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -66315,11 +66315,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -66334,13 +66334,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66353,12 +66353,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66371,12 +66371,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -66391,13 +66391,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, a_offset) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -66410,11 +66410,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, zero) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66429,12 +66429,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -66445,10 +66445,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -66459,10 +66459,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE2_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(2)
@@ -66473,13 +66473,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -66489,10 +66489,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -66503,10 +66503,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -66519,12 +66519,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -66536,11 +66536,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -66552,11 +66552,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -66567,11 +66567,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -66585,13 +66585,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -66602,11 +66602,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -66620,13 +66620,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -66637,11 +66637,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -66655,13 +66655,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66673,12 +66673,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66691,12 +66691,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66710,13 +66710,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66728,12 +66728,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66746,12 +66746,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66765,13 +66765,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -66783,11 +66783,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -66802,13 +66802,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66821,12 +66821,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66839,12 +66839,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -66859,13 +66859,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, a_offset) {
     TEST_REQUIRES_X86_SSE2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -66878,11 +66878,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, zero) {
     TEST_REQUIRES_X86_SSE2;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -66897,12 +66897,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, qmin) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -66913,10 +66913,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, qmax) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -66927,10 +66927,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE2_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE2;
     GemmMicrokernelTester()
       .mr(3)
@@ -66941,13 +66941,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -66957,10 +66957,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -66971,10 +66971,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -66987,12 +66987,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -67004,11 +67004,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -67020,11 +67020,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -67035,11 +67035,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -67053,13 +67053,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -67070,11 +67070,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -67088,13 +67088,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -67105,11 +67105,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -67123,13 +67123,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67141,12 +67141,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67159,12 +67159,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67178,13 +67178,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67196,12 +67196,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67214,12 +67214,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67233,13 +67233,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -67251,11 +67251,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -67270,13 +67270,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67289,12 +67289,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67307,12 +67307,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -67327,13 +67327,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, a_offset) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -67346,11 +67346,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, zero) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67365,12 +67365,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -67381,10 +67381,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -67395,10 +67395,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSSE3_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(1)
@@ -67409,13 +67409,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -67425,10 +67425,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -67439,10 +67439,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -67455,12 +67455,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -67472,11 +67472,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -67488,11 +67488,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -67503,11 +67503,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -67521,13 +67521,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -67538,11 +67538,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -67556,13 +67556,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -67573,11 +67573,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -67591,13 +67591,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67609,12 +67609,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67627,12 +67627,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67646,13 +67646,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67664,12 +67664,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67682,12 +67682,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67701,13 +67701,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -67719,11 +67719,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -67738,13 +67738,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67757,12 +67757,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67775,12 +67775,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -67795,13 +67795,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, a_offset) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -67814,11 +67814,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, zero) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -67833,12 +67833,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -67849,10 +67849,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -67863,10 +67863,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSSE3_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(2)
@@ -67877,13 +67877,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -67893,10 +67893,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -67907,10 +67907,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -67923,12 +67923,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -67940,11 +67940,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -67956,11 +67956,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -67971,11 +67971,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -67989,13 +67989,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -68006,11 +68006,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -68024,13 +68024,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -68041,11 +68041,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -68059,13 +68059,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68077,12 +68077,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68095,12 +68095,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68114,13 +68114,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68132,12 +68132,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68150,12 +68150,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68169,13 +68169,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -68187,11 +68187,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -68206,13 +68206,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68225,12 +68225,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68243,12 +68243,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -68263,13 +68263,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, a_offset) {
     TEST_REQUIRES_X86_SSSE3;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -68282,11 +68282,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, zero) {
     TEST_REQUIRES_X86_SSSE3;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68301,12 +68301,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, qmin) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -68317,10 +68317,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, qmax) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -68331,10 +68331,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSSE3_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSSE3;
     GemmMicrokernelTester()
       .mr(3)
@@ -68345,13 +68345,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_gemm_sse2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -68361,10 +68361,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -68375,10 +68375,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -68391,12 +68391,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -68408,11 +68408,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -68424,11 +68424,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -68439,11 +68439,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -68457,13 +68457,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -68474,11 +68474,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -68492,13 +68492,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -68509,11 +68509,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -68527,13 +68527,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68545,12 +68545,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68563,12 +68563,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68582,13 +68582,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68600,12 +68600,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68618,12 +68618,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68637,13 +68637,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -68655,11 +68655,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -68674,13 +68674,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68693,12 +68693,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68711,12 +68711,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -68731,13 +68731,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, a_offset) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -68750,11 +68750,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, zero) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -68769,12 +68769,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -68785,10 +68785,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -68799,10 +68799,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__SSE41_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(1)
@@ -68813,13 +68813,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -68829,10 +68829,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -68843,10 +68843,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -68859,12 +68859,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -68876,11 +68876,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -68892,11 +68892,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -68907,11 +68907,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -68925,13 +68925,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -68942,11 +68942,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -68960,13 +68960,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -68977,11 +68977,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -68995,13 +68995,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69013,12 +69013,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69031,12 +69031,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69050,13 +69050,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69068,12 +69068,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69086,12 +69086,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69105,13 +69105,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -69123,11 +69123,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -69142,13 +69142,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69161,12 +69161,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69179,12 +69179,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -69199,13 +69199,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, a_offset) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -69218,11 +69218,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, zero) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69237,12 +69237,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -69253,10 +69253,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -69267,10 +69267,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__SSE41_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(2)
@@ -69281,13 +69281,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_eq_8) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -69297,10 +69297,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -69311,10 +69311,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -69327,12 +69327,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -69344,11 +69344,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -69360,11 +69360,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_lt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -69375,11 +69375,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -69393,13 +69393,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_gt_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -69410,11 +69410,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -69428,13 +69428,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_div_8) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -69445,11 +69445,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -69463,13 +69463,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_gt_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69481,12 +69481,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69499,12 +69499,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69518,13 +69518,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_div_4) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69536,12 +69536,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69554,12 +69554,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69573,13 +69573,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -69591,11 +69591,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -69610,13 +69610,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69629,12 +69629,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69647,12 +69647,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -69667,13 +69667,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, a_offset) {
     TEST_REQUIRES_X86_SSE41;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -69686,11 +69686,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, zero) {
     TEST_REQUIRES_X86_SSE41;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69705,12 +69705,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, qmin) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -69721,10 +69721,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, qmax) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -69735,10 +69735,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__SSE41_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, strided_cm) {
     TEST_REQUIRES_X86_SSE41;
     GemmMicrokernelTester()
       .mr(3)
@@ -69749,13 +69749,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -69765,10 +69765,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -69779,10 +69779,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -69795,12 +69795,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -69812,11 +69812,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -69828,11 +69828,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -69843,11 +69843,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -69861,13 +69861,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -69878,11 +69878,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -69896,13 +69896,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -69913,11 +69913,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -69931,13 +69931,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69949,12 +69949,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69967,12 +69967,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -69986,13 +69986,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70004,12 +70004,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70022,12 +70022,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70041,13 +70041,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -70059,11 +70059,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -70078,13 +70078,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70097,12 +70097,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70115,12 +70115,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -70135,13 +70135,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, a_offset) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -70154,11 +70154,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, zero) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70173,12 +70173,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -70189,10 +70189,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -70203,10 +70203,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__AVX_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(1)
@@ -70217,13 +70217,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -70233,10 +70233,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -70247,10 +70247,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -70263,12 +70263,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -70280,11 +70280,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -70296,11 +70296,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -70311,11 +70311,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -70329,13 +70329,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -70346,11 +70346,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -70364,13 +70364,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -70381,11 +70381,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -70399,13 +70399,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70417,12 +70417,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70435,12 +70435,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70454,13 +70454,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70472,12 +70472,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70490,12 +70490,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70509,13 +70509,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -70527,11 +70527,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -70546,13 +70546,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70565,12 +70565,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70583,12 +70583,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -70603,13 +70603,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, a_offset) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -70622,11 +70622,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, zero) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70641,12 +70641,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -70657,10 +70657,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -70671,10 +70671,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__AVX_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(2)
@@ -70685,13 +70685,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_eq_8) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -70701,10 +70701,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, strided_cn) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -70715,10 +70715,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -70731,12 +70731,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -70748,11 +70748,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -70764,11 +70764,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_lt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -70779,11 +70779,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -70797,13 +70797,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_gt_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -70814,11 +70814,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -70832,13 +70832,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_div_8) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -70849,11 +70849,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -70867,13 +70867,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_gt_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70885,12 +70885,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70903,12 +70903,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70922,13 +70922,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_div_4) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70940,12 +70940,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70958,12 +70958,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -70977,13 +70977,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -70995,11 +70995,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -71014,13 +71014,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71033,12 +71033,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71051,12 +71051,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -71071,13 +71071,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, a_offset) {
     TEST_REQUIRES_X86_AVX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -71090,11 +71090,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, zero) {
     TEST_REQUIRES_X86_AVX;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71109,12 +71109,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, qmin) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -71125,10 +71125,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, qmax) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -71139,10 +71139,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__AVX_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, strided_cm) {
     TEST_REQUIRES_X86_AVX;
     GemmMicrokernelTester()
       .mr(3)
@@ -71153,13 +71153,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -71169,10 +71169,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -71183,10 +71183,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -71199,12 +71199,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -71216,11 +71216,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -71232,11 +71232,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -71247,11 +71247,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -71265,13 +71265,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -71282,11 +71282,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -71300,13 +71300,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -71317,11 +71317,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -71335,13 +71335,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71353,12 +71353,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71371,12 +71371,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71390,13 +71390,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71408,12 +71408,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71426,12 +71426,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71445,13 +71445,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -71463,11 +71463,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -71482,13 +71482,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71501,12 +71501,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71519,12 +71519,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -71539,13 +71539,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, a_offset) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -71558,11 +71558,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, zero) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71577,12 +71577,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -71593,10 +71593,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -71607,10 +71607,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__XOP_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(1)
@@ -71621,13 +71621,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -71637,10 +71637,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -71651,10 +71651,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -71667,12 +71667,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -71684,11 +71684,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -71700,11 +71700,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -71715,11 +71715,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -71733,13 +71733,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -71750,11 +71750,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -71768,13 +71768,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -71785,11 +71785,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -71803,13 +71803,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71821,12 +71821,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71839,12 +71839,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71858,13 +71858,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71876,12 +71876,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71894,12 +71894,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71913,13 +71913,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -71931,11 +71931,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -71950,13 +71950,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71969,12 +71969,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -71987,12 +71987,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -72007,13 +72007,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, a_offset) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -72026,11 +72026,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, zero) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72045,12 +72045,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -72061,10 +72061,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -72075,10 +72075,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__XOP_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(2)
@@ -72089,13 +72089,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_eq_8) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -72105,10 +72105,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, strided_cn) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -72119,10 +72119,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_eq_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -72135,12 +72135,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -72152,11 +72152,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -72168,11 +72168,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_lt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -72183,11 +72183,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_lt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -72201,13 +72201,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_gt_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -72218,11 +72218,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_gt_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -72236,13 +72236,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_div_8) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -72253,11 +72253,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_div_8_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -72271,13 +72271,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_gt_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72289,12 +72289,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_gt_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72307,12 +72307,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_gt_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72326,13 +72326,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_div_4) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72344,12 +72344,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_div_4_strided_cn) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72362,12 +72362,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_div_4_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72381,13 +72381,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -72399,11 +72399,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, small_kernel_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -72418,13 +72418,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_gt_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72437,12 +72437,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, n_div_4_small_kernel) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72455,12 +72455,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, strided_cm_subtile) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -72475,13 +72475,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, a_offset) {
     TEST_REQUIRES_X86_XOP;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -72494,11 +72494,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, zero) {
     TEST_REQUIRES_X86_XOP;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72513,12 +72513,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, qmin) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -72529,10 +72529,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, qmax) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -72543,10 +72543,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__XOP_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, strided_cm) {
     TEST_REQUIRES_X86_XOP;
     GemmMicrokernelTester()
       .mr(3)
@@ -72557,13 +72557,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(1)
@@ -72573,10 +72573,10 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(1)
@@ -72587,10 +72587,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -72603,12 +72603,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -72620,11 +72620,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -72636,11 +72636,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_lt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -72651,11 +72651,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -72669,13 +72669,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -72686,11 +72686,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -72704,13 +72704,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -72721,11 +72721,11 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -72739,13 +72739,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72757,12 +72757,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_gt_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72775,12 +72775,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72794,13 +72794,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72812,12 +72812,12 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_div_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72830,12 +72830,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72849,13 +72849,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, small_kernel) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -72867,11 +72867,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -72886,13 +72886,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_gt_8_small_kernel) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72905,12 +72905,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, n_div_8_small_kernel) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72923,12 +72923,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -72943,13 +72943,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, a_offset) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -72962,11 +72962,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, zero) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -72981,12 +72981,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, qmin) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(1)
@@ -72997,10 +72997,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, qmax) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(1)
@@ -73011,10 +73011,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X8C8__AVX2, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, strided_cm) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(1)
@@ -73025,13 +73025,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(2)
@@ -73041,10 +73041,10 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(2)
@@ -73055,10 +73055,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -73071,12 +73071,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -73088,11 +73088,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -73104,11 +73104,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_lt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -73119,11 +73119,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -73137,13 +73137,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -73154,11 +73154,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -73172,13 +73172,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -73189,11 +73189,11 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -73207,13 +73207,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73225,12 +73225,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_gt_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73243,12 +73243,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73262,13 +73262,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73280,12 +73280,12 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_div_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73298,12 +73298,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73317,13 +73317,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, small_kernel) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -73335,11 +73335,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -73354,13 +73354,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_gt_8_small_kernel) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73373,12 +73373,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, n_div_8_small_kernel) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73391,12 +73391,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -73411,13 +73411,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, a_offset) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -73430,11 +73430,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, zero) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73449,12 +73449,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, qmin) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(2)
@@ -73465,10 +73465,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, qmax) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(2)
@@ -73479,10 +73479,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X8C8__AVX2, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, strided_cm) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(2)
@@ -73493,13 +73493,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(3)
@@ -73509,10 +73509,10 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(3)
@@ -73523,10 +73523,10 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 8; n++) {
@@ -73539,12 +73539,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -73556,11 +73556,11 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 1; n <= 8; n++) {
       GemmMicrokernelTester()
@@ -73572,11 +73572,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_lt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -73587,11 +73587,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -73605,13 +73605,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -73622,11 +73622,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -73640,13 +73640,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -73657,11 +73657,11 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -73675,13 +73675,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, n_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_gt_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73693,12 +73693,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, n_gt_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_gt_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73711,12 +73711,12 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, n_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73730,13 +73730,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, n_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_div_8) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73748,12 +73748,12 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, n_div_8_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_div_8_strided_cn) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73766,12 +73766,12 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, n_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_div_8_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73785,13 +73785,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, small_kernel) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -73803,11 +73803,11 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -73822,13 +73822,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, n_gt_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_gt_8_small_kernel) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 9; n < 16; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73841,12 +73841,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, n_div_8_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, n_div_8_small_kernel) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t n = 16; n <= 24; n += 8) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73859,12 +73859,12 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -73879,13 +73879,13 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, a_offset) {
     TEST_REQUIRES_X86_AVX2;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -73898,11 +73898,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, zero) {
     TEST_REQUIRES_X86_AVX2;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -73917,12 +73917,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, qmin) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(3)
@@ -73933,10 +73933,10 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, qmax) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(3)
@@ -73947,10 +73947,10 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X8C8__AVX2, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, strided_cm) {
     TEST_REQUIRES_X86_AVX2;
     GemmMicrokernelTester()
       .mr(3)
@@ -73961,13 +73961,13 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_gemm_avx2_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_eq_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(1)
@@ -73977,10 +73977,10 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(1)
@@ -73991,10 +73991,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -74007,12 +74007,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
@@ -74024,11 +74024,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -74040,11 +74040,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_lt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -74055,11 +74055,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -74073,13 +74073,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_gt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -74090,11 +74090,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -74108,13 +74108,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_div_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -74125,11 +74125,11 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -74143,13 +74143,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_gt_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74161,12 +74161,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_gt_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74179,12 +74179,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_gt_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74198,13 +74198,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_div_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74216,12 +74216,12 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_div_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74234,12 +74234,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_div_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74253,13 +74253,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, small_kernel) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -74271,11 +74271,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -74290,13 +74290,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_gt_16_small_kernel) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74309,12 +74309,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, n_div_16_small_kernel) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74327,12 +74327,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -74347,13 +74347,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, a_offset) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -74366,11 +74366,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, zero) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74385,12 +74385,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, qmin) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(1)
@@ -74401,10 +74401,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, qmax) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(1)
@@ -74415,10 +74415,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X16C8__AVX512SKX, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, strided_cm) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(1)
@@ -74429,13 +74429,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_eq_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(2)
@@ -74445,10 +74445,10 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(2)
@@ -74459,10 +74459,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -74475,12 +74475,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
@@ -74492,11 +74492,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -74508,11 +74508,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_lt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -74523,11 +74523,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -74541,13 +74541,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_gt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -74558,11 +74558,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -74576,13 +74576,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_div_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -74593,11 +74593,11 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -74611,13 +74611,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_gt_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74629,12 +74629,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_gt_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74647,12 +74647,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_gt_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74666,13 +74666,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_div_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74684,12 +74684,12 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_div_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74702,12 +74702,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_div_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74721,13 +74721,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, small_kernel) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -74739,11 +74739,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -74758,13 +74758,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_gt_16_small_kernel) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74777,12 +74777,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, n_div_16_small_kernel) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74795,12 +74795,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -74815,13 +74815,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, a_offset) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -74834,11 +74834,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, zero) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -74853,12 +74853,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, qmin) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(2)
@@ -74869,10 +74869,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, qmax) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(2)
@@ -74883,10 +74883,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X16C8__AVX512SKX, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, strided_cm) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(2)
@@ -74897,13 +74897,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_eq_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(3)
@@ -74913,10 +74913,10 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(3)
@@ -74927,10 +74927,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -74943,12 +74943,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
@@ -74960,11 +74960,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -74976,11 +74976,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_lt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -74991,11 +74991,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -75009,13 +75009,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_gt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -75026,11 +75026,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -75044,13 +75044,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_div_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -75061,11 +75061,11 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -75079,13 +75079,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_gt_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75097,12 +75097,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_gt_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75115,12 +75115,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_gt_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75134,13 +75134,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_div_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75152,12 +75152,12 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_div_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75170,12 +75170,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_div_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75189,13 +75189,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, small_kernel) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -75207,11 +75207,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -75226,13 +75226,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_gt_16_small_kernel) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75245,12 +75245,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, n_div_16_small_kernel) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75263,12 +75263,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -75283,13 +75283,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, a_offset) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -75302,11 +75302,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, zero) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75321,12 +75321,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, qmin) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(3)
@@ -75337,10 +75337,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, qmax) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(3)
@@ -75351,10 +75351,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X16C8__AVX512SKX, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, strided_cm) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(3)
@@ -75365,13 +75365,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_eq_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(4)
@@ -75381,10 +75381,10 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(4)
@@ -75395,10 +75395,10 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_eq_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 16; n++) {
@@ -75411,12 +75411,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_eq_8_subtile_m) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t m = 1; m <= 4; m++) {
       GemmMicrokernelTester()
@@ -75428,11 +75428,11 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_eq_8_subtile_n) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 1; n <= 16; n++) {
       GemmMicrokernelTester()
@@ -75444,11 +75444,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_lt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
@@ -75459,11 +75459,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_lt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -75477,13 +75477,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_gt_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
@@ -75494,11 +75494,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_gt_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -75512,13 +75512,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_div_8) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
@@ -75529,11 +75529,11 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_div_8_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -75547,13 +75547,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, n_gt_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_gt_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75565,12 +75565,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, n_gt_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_gt_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75583,12 +75583,12 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, n_gt_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_gt_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75602,13 +75602,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, n_div_16) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_div_16) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75620,12 +75620,12 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, n_div_16_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_div_16_strided_cn) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75638,12 +75638,12 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, n_div_16_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_div_16_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75657,13 +75657,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, small_kernel) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -75675,11 +75675,11 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, small_kernel_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -75694,13 +75694,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, n_gt_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_gt_16_small_kernel) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 17; n < 32; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75713,12 +75713,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, n_div_16_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, n_div_16_small_kernel) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t n = 32; n <= 48; n += 16) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75731,12 +75731,12 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, strided_cm_subtile) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -75751,13 +75751,13 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, a_offset) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
@@ -75770,11 +75770,11 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, zero) {
     TEST_REQUIRES_X86_AVX512SKX;
     for (uint32_t mz = 0; mz < 4; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
@@ -75789,12 +75789,12 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, qmin) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(4)
@@ -75805,10 +75805,10 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, qmax) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(4)
@@ -75819,10 +75819,10 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_4X16C8__AVX512SKX, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, strided_cm) {
     TEST_REQUIRES_X86_AVX512SKX;
     GemmMicrokernelTester()
       .mr(4)
@@ -75833,13 +75833,13 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_gemm_sse4_params);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_eq_8) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -75848,10 +75848,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, strided_cn) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -75861,10 +75861,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -75876,12 +75876,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -75892,11 +75892,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -75907,11 +75907,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -75921,11 +75921,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -75938,13 +75938,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -75954,11 +75954,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -75971,13 +75971,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(1)
@@ -75987,11 +75987,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -76004,13 +76004,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76021,12 +76021,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76038,12 +76038,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 1; m++) {
@@ -76056,13 +76056,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76073,12 +76073,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76090,12 +76090,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 1; m++) {
@@ -76108,13 +76108,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, small_kernel) {
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
         .mr(1)
@@ -76125,11 +76125,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, small_kernel_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -76143,13 +76143,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_gt_4_small_kernel) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76161,12 +76161,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, n_div_4_small_kernel) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76178,12 +76178,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -76197,13 +76197,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, a_offset) {
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
         .mr(1)
@@ -76215,11 +76215,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, zero) {
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76233,12 +76233,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, qmin) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -76248,10 +76248,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, qmax) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -76261,10 +76261,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, strided_cm) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -76274,13 +76274,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_eq_8) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -76289,10 +76289,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, strided_cn) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -76302,10 +76302,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -76317,12 +76317,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -76333,11 +76333,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -76348,11 +76348,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -76362,11 +76362,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -76379,13 +76379,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -76395,11 +76395,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -76412,13 +76412,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(2)
@@ -76428,11 +76428,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -76445,13 +76445,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76462,12 +76462,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76479,12 +76479,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 2; m++) {
@@ -76497,13 +76497,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76514,12 +76514,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76531,12 +76531,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 2; m++) {
@@ -76549,13 +76549,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, small_kernel) {
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
         .mr(2)
@@ -76566,11 +76566,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, small_kernel_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -76584,13 +76584,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_gt_4_small_kernel) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76602,12 +76602,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, n_div_4_small_kernel) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76619,12 +76619,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -76638,13 +76638,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, a_offset) {
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
         .mr(2)
@@ -76656,11 +76656,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, zero) {
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76674,12 +76674,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, qmin) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -76689,10 +76689,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, qmax) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -76702,10 +76702,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, strided_cm) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -76715,13 +76715,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_eq_8) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -76730,10 +76730,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, strided_cn) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -76743,10 +76743,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -76758,12 +76758,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -76774,11 +76774,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -76789,11 +76789,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -76803,11 +76803,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -76820,13 +76820,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -76836,11 +76836,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -76853,13 +76853,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(3)
@@ -76869,11 +76869,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -76886,13 +76886,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76903,12 +76903,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76920,12 +76920,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 3; m++) {
@@ -76938,13 +76938,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76955,12 +76955,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -76972,12 +76972,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 3; m++) {
@@ -76990,13 +76990,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, small_kernel) {
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
         .mr(3)
@@ -77007,11 +77007,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, small_kernel_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -77025,13 +77025,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_gt_4_small_kernel) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77043,12 +77043,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, n_div_4_small_kernel) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77060,12 +77060,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -77079,13 +77079,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, a_offset) {
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
         .mr(3)
@@ -77097,11 +77097,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, zero) {
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77115,12 +77115,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, qmin) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -77130,10 +77130,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, qmax) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -77143,10 +77143,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD64, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, strided_cm) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -77156,13 +77156,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_eq_8) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -77171,10 +77171,10 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, strided_cn) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -77184,10 +77184,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -77199,12 +77199,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 1; m++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -77215,11 +77215,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -77230,11 +77230,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -77244,11 +77244,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -77261,13 +77261,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(1)
@@ -77277,11 +77277,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -77294,13 +77294,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(1)
@@ -77310,11 +77310,11 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -77327,13 +77327,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77344,12 +77344,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77361,12 +77361,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 1; m++) {
@@ -77379,13 +77379,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77396,12 +77396,12 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77413,12 +77413,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 1; m++) {
@@ -77431,13 +77431,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, small_kernel) {
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
         .mr(1)
@@ -77448,11 +77448,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, small_kernel_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -77466,13 +77466,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_gt_4_small_kernel) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77484,12 +77484,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, n_div_4_small_kernel) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77501,12 +77501,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 1; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -77520,13 +77520,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, a_offset) {
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
         .mr(1)
@@ -77538,11 +77538,11 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, zero) {
     for (uint32_t mz = 0; mz < 1; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77556,12 +77556,12 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, qmin) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -77571,10 +77571,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, qmax) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -77584,10 +77584,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_1X4C8__WASMSIMD_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, strided_cm) {
     GemmMicrokernelTester()
       .mr(1)
       .nr(4)
@@ -77597,13 +77597,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_eq_8) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -77612,10 +77612,10 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, strided_cn) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -77625,10 +77625,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -77640,12 +77640,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 2; m++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -77656,11 +77656,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -77671,11 +77671,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -77685,11 +77685,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -77702,13 +77702,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(2)
@@ -77718,11 +77718,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -77735,13 +77735,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(2)
@@ -77751,11 +77751,11 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -77768,13 +77768,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77785,12 +77785,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77802,12 +77802,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 2; m++) {
@@ -77820,13 +77820,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77837,12 +77837,12 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77854,12 +77854,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 2; m++) {
@@ -77872,13 +77872,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, small_kernel) {
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
         .mr(2)
@@ -77889,11 +77889,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, small_kernel_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -77907,13 +77907,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_gt_4_small_kernel) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77925,12 +77925,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, n_div_4_small_kernel) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77942,12 +77942,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 2; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -77961,13 +77961,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, a_offset) {
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
         .mr(2)
@@ -77979,11 +77979,11 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, zero) {
     for (uint32_t mz = 0; mz < 2; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -77997,12 +77997,12 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, qmin) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -78012,10 +78012,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, qmax) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -78025,10 +78025,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_2X4C8__WASMSIMD_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, strided_cm) {
     GemmMicrokernelTester()
       .mr(2)
       .nr(4)
@@ -78038,13 +78038,13 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
 #if XNN_ARCH_WASMSIMD
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_eq_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_eq_8) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -78053,10 +78053,10 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, strided_cn) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -78066,10 +78066,10 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
         GemmMicrokernelTester()
@@ -78081,12 +78081,12 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_eq_8_subtile_m) {
     for (uint32_t m = 1; m <= 3; m++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -78097,11 +78097,11 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_eq_8_subtile_n) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -78112,11 +78112,11 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_lt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_lt_8) {
     for (size_t k = 1; k < 8; k++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -78126,11 +78126,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_lt_8_subtile) {
     for (size_t k = 1; k < 8; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -78143,13 +78143,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_gt_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_gt_8) {
     for (size_t k = 9; k < 16; k++) {
       GemmMicrokernelTester()
         .mr(3)
@@ -78159,11 +78159,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_gt_8_subtile) {
     for (size_t k = 9; k < 16; k++) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -78176,13 +78176,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_div_8) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_div_8) {
     for (size_t k = 16; k <= 80; k += 8) {
       GemmMicrokernelTester()
         .mr(3)
@@ -78192,11 +78192,11 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, k_div_8_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_div_8_subtile) {
     for (size_t k = 16; k <= 80; k += 8) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -78209,13 +78209,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_gt_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_gt_4) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -78226,12 +78226,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_gt_4_strided_cn) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -78243,12 +78243,12 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_gt_4_subtile) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 3; m++) {
@@ -78261,13 +78261,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_div_4) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_div_4) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -78278,12 +78278,12 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_div_4_strided_cn) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -78295,12 +78295,12 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_div_4_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_div_4_subtile) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         for (uint32_t m = 1; m <= 3; m++) {
@@ -78313,13 +78313,13 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, small_kernel) {
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
         .mr(3)
@@ -78330,11 +78330,11 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, small_kernel_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, small_kernel_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -78348,13 +78348,13 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_gt_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_gt_4_small_kernel) {
     for (uint32_t n = 5; n < 8; n++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -78366,12 +78366,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, n_div_4_small_kernel) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, n_div_4_small_kernel) {
     for (uint32_t n = 8; n <= 12; n += 4) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -78383,12 +78383,12 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, strided_cm_subtile) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, strided_cm_subtile) {
     for (size_t k = 1; k <= 40; k += 9) {
       for (uint32_t m = 1; m <= 3; m++) {
         for (uint32_t n = 1; n <= 4; n++) {
@@ -78402,13 +78402,13 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
         }
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, a_offset) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, a_offset) {
     for (size_t k = 1; k <= 40; k += 9) {
       GemmMicrokernelTester()
         .mr(3)
@@ -78420,11 +78420,11 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, zero) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, zero) {
     for (uint32_t mz = 0; mz < 3; mz++) {
       for (size_t k = 1; k <= 40; k += 9) {
         GemmMicrokernelTester()
@@ -78438,12 +78438,12 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
       }
     }
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, qmin) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, qmin) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -78453,10 +78453,10 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, qmax) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, qmax) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -78466,10 +78466,10 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 
-  TEST(QS8_IGEMM_MINMAX_3X4C8__WASMSIMD_LD128, strided_cm) {
+  TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, strided_cm) {
     GemmMicrokernelTester()
       .mr(3)
       .nr(4)
@@ -78479,12 +78479,12 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_gemm_wasmsimd_params);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, k_eq_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(2)
@@ -78493,10 +78493,10 @@
     .m(1)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(2)
@@ -78506,10 +78506,10 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, k_eq_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 1; m++) {
     for (uint32_t n = 1; n <= 2; n++) {
       GemmMicrokernelTester()
@@ -78521,12 +78521,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 1; m++) {
     GemmMicrokernelTester()
       .mr(1)
@@ -78537,11 +78537,11 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 2; n++) {
     GemmMicrokernelTester()
       .mr(1)
@@ -78552,11 +78552,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, k_gt_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(1)
@@ -78566,11 +78566,11 @@
       .m(1)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, k_gt_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -78583,13 +78583,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, n_gt_2) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_gt_2) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -78600,12 +78600,12 @@
         .m(1)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, n_gt_2_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_gt_2_strided_cn) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -78617,12 +78617,12 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, n_gt_2_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_gt_2_subtile) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -78635,13 +78635,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, n_div_2) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_div_2) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -78652,12 +78652,12 @@
         .m(1)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, n_div_2_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_div_2_strided_cn) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -78669,12 +78669,12 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, n_div_2_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_div_2_subtile) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -78687,13 +78687,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, small_kernel) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(1)
@@ -78704,11 +78704,11 @@
       .n(2)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, small_kernel_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, small_kernel_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -78722,13 +78722,13 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, n_gt_2_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_gt_2_small_kernel) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -78740,12 +78740,12 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, n_div_2_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, n_div_2_small_kernel) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -78757,12 +78757,12 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, strided_cm_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -78776,13 +78776,13 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, a_offset) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, a_offset) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(1)
@@ -78794,11 +78794,11 @@
       .k(k)
       .ks(3)
       .a_offset(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, zero) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, zero) {
   for (uint32_t mz = 0; mz < 1; mz++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -78812,12 +78812,12 @@
         .ks(3)
         .a_offset(7)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, qmin) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(2)
@@ -78827,10 +78827,10 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, qmax) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(2)
@@ -78840,10 +78840,10 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_1X2__SCALAR, strided_cm) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(2)
@@ -78853,11 +78853,11 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_igemm_minmax_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, k_eq_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(2)
@@ -78866,10 +78866,10 @@
     .m(2)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(2)
@@ -78879,10 +78879,10 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, k_eq_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 2; m++) {
     for (uint32_t n = 1; n <= 2; n++) {
       GemmMicrokernelTester()
@@ -78894,12 +78894,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 2; m++) {
     GemmMicrokernelTester()
       .mr(2)
@@ -78910,11 +78910,11 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 2; n++) {
     GemmMicrokernelTester()
       .mr(2)
@@ -78925,11 +78925,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, k_gt_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(2)
@@ -78939,11 +78939,11 @@
       .m(2)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, k_gt_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -78956,13 +78956,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, n_gt_2) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_gt_2) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -78973,12 +78973,12 @@
         .m(2)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, n_gt_2_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_gt_2_strided_cn) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -78990,12 +78990,12 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, n_gt_2_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_gt_2_subtile) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -79008,13 +79008,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, n_div_2) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_div_2) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79025,12 +79025,12 @@
         .m(2)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, n_div_2_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_div_2_strided_cn) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79042,12 +79042,12 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, n_div_2_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_div_2_subtile) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -79060,13 +79060,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, small_kernel) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(2)
@@ -79077,11 +79077,11 @@
       .n(2)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, small_kernel_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, small_kernel_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -79095,13 +79095,13 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, n_gt_2_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_gt_2_small_kernel) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79113,12 +79113,12 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, n_div_2_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, n_div_2_small_kernel) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79130,12 +79130,12 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, strided_cm_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -79149,13 +79149,13 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, a_offset) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, a_offset) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(2)
@@ -79167,11 +79167,11 @@
       .k(k)
       .ks(3)
       .a_offset(13)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, zero) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, zero) {
   for (uint32_t mz = 0; mz < 2; mz++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79185,12 +79185,12 @@
         .ks(3)
         .a_offset(13)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, qmin) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(2)
@@ -79200,10 +79200,10 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, qmax) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(2)
@@ -79213,10 +79213,10 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_2X2__SCALAR, strided_cm) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(2)
@@ -79226,11 +79226,11 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_igemm_minmax_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, k_eq_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(2)
@@ -79239,10 +79239,10 @@
     .m(3)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(2)
@@ -79252,10 +79252,10 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, k_eq_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 3; m++) {
     for (uint32_t n = 1; n <= 2; n++) {
       GemmMicrokernelTester()
@@ -79267,12 +79267,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 3; m++) {
     GemmMicrokernelTester()
       .mr(3)
@@ -79283,11 +79283,11 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 2; n++) {
     GemmMicrokernelTester()
       .mr(3)
@@ -79298,11 +79298,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, k_gt_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(3)
@@ -79312,11 +79312,11 @@
       .m(3)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, k_gt_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -79329,13 +79329,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, n_gt_2) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_gt_2) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79346,12 +79346,12 @@
         .m(3)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, n_gt_2_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_gt_2_strided_cn) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79363,12 +79363,12 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, n_gt_2_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_gt_2_subtile) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -79381,13 +79381,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, n_div_2) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_div_2) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79398,12 +79398,12 @@
         .m(3)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, n_div_2_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_div_2_strided_cn) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79415,12 +79415,12 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, n_div_2_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_div_2_subtile) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -79433,13 +79433,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, small_kernel) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(3)
@@ -79450,11 +79450,11 @@
       .n(2)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, small_kernel_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, small_kernel_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -79468,13 +79468,13 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, n_gt_2_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_gt_2_small_kernel) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79486,12 +79486,12 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, n_div_2_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, n_div_2_small_kernel) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79503,12 +79503,12 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, strided_cm_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -79522,13 +79522,13 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, a_offset) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, a_offset) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(3)
@@ -79540,11 +79540,11 @@
       .k(k)
       .ks(3)
       .a_offset(17)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, zero) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, zero) {
   for (uint32_t mz = 0; mz < 3; mz++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79558,12 +79558,12 @@
         .ks(3)
         .a_offset(17)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, qmin) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(2)
@@ -79573,10 +79573,10 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, qmax) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(2)
@@ -79586,10 +79586,10 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_3X2__SCALAR, strided_cm) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(2)
@@ -79599,11 +79599,11 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_igemm_minmax_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, k_eq_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(2)
@@ -79612,10 +79612,10 @@
     .m(4)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(2)
@@ -79625,10 +79625,10 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 4; m++) {
     for (uint32_t n = 1; n <= 2; n++) {
       GemmMicrokernelTester()
@@ -79640,12 +79640,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 4; m++) {
     GemmMicrokernelTester()
       .mr(4)
@@ -79656,11 +79656,11 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 2; n++) {
     GemmMicrokernelTester()
       .mr(4)
@@ -79671,11 +79671,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, k_gt_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(4)
@@ -79685,11 +79685,11 @@
       .m(4)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, k_gt_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -79702,13 +79702,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, n_gt_2) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_gt_2) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79719,12 +79719,12 @@
         .m(4)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, n_gt_2_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_gt_2_strided_cn) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79736,12 +79736,12 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, n_gt_2_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_gt_2_subtile) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -79754,13 +79754,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, n_div_2) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_div_2) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79771,12 +79771,12 @@
         .m(4)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, n_div_2_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_div_2_strided_cn) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79788,12 +79788,12 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, n_div_2_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_div_2_subtile) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -79806,13 +79806,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, small_kernel) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(4)
@@ -79823,11 +79823,11 @@
       .n(2)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, small_kernel_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, small_kernel_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -79841,13 +79841,13 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, n_gt_2_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_gt_2_small_kernel) {
   for (uint32_t n = 3; n < 4; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79859,12 +79859,12 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, n_div_2_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, n_div_2_small_kernel) {
   for (uint32_t n = 4; n <= 6; n += 2) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79876,12 +79876,12 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, strided_cm_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 2; n++) {
@@ -79895,13 +79895,13 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, a_offset) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, a_offset) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(4)
@@ -79913,11 +79913,11 @@
       .k(k)
       .ks(3)
       .a_offset(23)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, zero) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, zero) {
   for (uint32_t mz = 0; mz < 4; mz++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -79931,12 +79931,12 @@
         .ks(3)
         .a_offset(23)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, qmin) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(2)
@@ -79946,10 +79946,10 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, qmax) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(2)
@@ -79959,10 +79959,10 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_4X2__SCALAR, strided_cm) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(2)
@@ -79972,11 +79972,11 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_igemm_minmax_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, k_eq_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(4)
@@ -79985,10 +79985,10 @@
     .m(1)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(4)
@@ -79998,10 +79998,10 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 1; m++) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -80013,12 +80013,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 1; m++) {
     GemmMicrokernelTester()
       .mr(1)
@@ -80029,11 +80029,11 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 4; n++) {
     GemmMicrokernelTester()
       .mr(1)
@@ -80044,11 +80044,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, k_gt_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(1)
@@ -80058,11 +80058,11 @@
       .m(1)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, k_gt_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -80075,13 +80075,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, n_gt_4) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_gt_4) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80092,12 +80092,12 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, n_gt_4_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_gt_4_strided_cn) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80109,12 +80109,12 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, n_gt_4_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_gt_4_subtile) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -80127,13 +80127,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, n_div_4) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_div_4) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80144,12 +80144,12 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, n_div_4_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_div_4_strided_cn) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80161,12 +80161,12 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, n_div_4_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_div_4_subtile) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 1; m++) {
@@ -80179,13 +80179,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, small_kernel) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(1)
@@ -80196,11 +80196,11 @@
       .n(4)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, small_kernel_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, small_kernel_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -80214,13 +80214,13 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, n_gt_4_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_gt_4_small_kernel) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80232,12 +80232,12 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, n_div_4_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, n_div_4_small_kernel) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80249,12 +80249,12 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, strided_cm_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 1; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -80268,13 +80268,13 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, a_offset) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, a_offset) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(1)
@@ -80286,11 +80286,11 @@
       .k(k)
       .ks(3)
       .a_offset(7)
-      .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, zero) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, zero) {
   for (uint32_t mz = 0; mz < 1; mz++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80304,12 +80304,12 @@
         .ks(3)
         .a_offset(7)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, qmin) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(4)
@@ -80319,10 +80319,10 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, qmax) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(4)
@@ -80332,10 +80332,10 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_1X4__SCALAR, strided_cm) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(1)
     .nr(4)
@@ -80345,11 +80345,11 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_igemm_minmax_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, k_eq_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(4)
@@ -80358,10 +80358,10 @@
     .m(2)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(4)
@@ -80371,10 +80371,10 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, k_eq_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 2; m++) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -80386,12 +80386,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 2; m++) {
     GemmMicrokernelTester()
       .mr(2)
@@ -80402,11 +80402,11 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 4; n++) {
     GemmMicrokernelTester()
       .mr(2)
@@ -80417,11 +80417,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, k_gt_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(2)
@@ -80431,11 +80431,11 @@
       .m(2)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, k_gt_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -80448,13 +80448,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, n_gt_4) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_gt_4) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80465,12 +80465,12 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, n_gt_4_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_gt_4_strided_cn) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80482,12 +80482,12 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, n_gt_4_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_gt_4_subtile) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -80500,13 +80500,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, n_div_4) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_div_4) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80517,12 +80517,12 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, n_div_4_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_div_4_strided_cn) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80534,12 +80534,12 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, n_div_4_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_div_4_subtile) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 2; m++) {
@@ -80552,13 +80552,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, small_kernel) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(2)
@@ -80569,11 +80569,11 @@
       .n(4)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, small_kernel_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, small_kernel_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -80587,13 +80587,13 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, n_gt_4_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_gt_4_small_kernel) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80605,12 +80605,12 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, n_div_4_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, n_div_4_small_kernel) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80622,12 +80622,12 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, strided_cm_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 2; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -80641,13 +80641,13 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, a_offset) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, a_offset) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(2)
@@ -80659,11 +80659,11 @@
       .k(k)
       .ks(3)
       .a_offset(13)
-      .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, zero) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, zero) {
   for (uint32_t mz = 0; mz < 2; mz++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80677,12 +80677,12 @@
         .ks(3)
         .a_offset(13)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, qmin) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(4)
@@ -80692,10 +80692,10 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, qmax) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(4)
@@ -80705,10 +80705,10 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_2X4__SCALAR, strided_cm) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(2)
     .nr(4)
@@ -80718,11 +80718,11 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_igemm_minmax_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, k_eq_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(4)
@@ -80731,10 +80731,10 @@
     .m(3)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(4)
@@ -80744,10 +80744,10 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, k_eq_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 3; m++) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -80759,12 +80759,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 3; m++) {
     GemmMicrokernelTester()
       .mr(3)
@@ -80775,11 +80775,11 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 4; n++) {
     GemmMicrokernelTester()
       .mr(3)
@@ -80790,11 +80790,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, k_gt_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(3)
@@ -80804,11 +80804,11 @@
       .m(3)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, k_gt_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -80821,13 +80821,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, n_gt_4) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_gt_4) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80838,12 +80838,12 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, n_gt_4_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_gt_4_strided_cn) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80855,12 +80855,12 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, n_gt_4_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_gt_4_subtile) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -80873,13 +80873,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, n_div_4) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_div_4) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80890,12 +80890,12 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, n_div_4_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_div_4_strided_cn) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80907,12 +80907,12 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, n_div_4_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_div_4_subtile) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 3; m++) {
@@ -80925,13 +80925,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, small_kernel) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(3)
@@ -80942,11 +80942,11 @@
       .n(4)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, small_kernel_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, small_kernel_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -80960,13 +80960,13 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, n_gt_4_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_gt_4_small_kernel) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80978,12 +80978,12 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, n_div_4_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, n_div_4_small_kernel) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -80995,12 +80995,12 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, strided_cm_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 3; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -81014,13 +81014,13 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, a_offset) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, a_offset) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(3)
@@ -81032,11 +81032,11 @@
       .k(k)
       .ks(3)
       .a_offset(17)
-      .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, zero) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, zero) {
   for (uint32_t mz = 0; mz < 3; mz++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -81050,12 +81050,12 @@
         .ks(3)
         .a_offset(17)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, qmin) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(4)
@@ -81065,10 +81065,10 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, qmax) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(4)
@@ -81078,10 +81078,10 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_3X4__SCALAR, strided_cm) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(3)
     .nr(4)
@@ -81091,11 +81091,11 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_igemm_minmax_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, k_eq_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_eq_1) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(4)
@@ -81104,10 +81104,10 @@
     .m(4)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, strided_cn) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(4)
@@ -81117,10 +81117,10 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_eq_1_subtile) {
   for (uint32_t m = 1; m <= 4; m++) {
     for (uint32_t n = 1; n <= 4; n++) {
       GemmMicrokernelTester()
@@ -81132,12 +81132,12 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_m) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_eq_1_subtile_m) {
   for (uint32_t m = 1; m <= 4; m++) {
     GemmMicrokernelTester()
       .mr(4)
@@ -81148,11 +81148,11 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, k_eq_1_subtile_n) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_eq_1_subtile_n) {
   for (uint32_t n = 1; n <= 4; n++) {
     GemmMicrokernelTester()
       .mr(4)
@@ -81163,11 +81163,11 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, k_gt_1) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_gt_1) {
   for (size_t k = 2; k < 10; k++) {
     GemmMicrokernelTester()
       .mr(4)
@@ -81177,11 +81177,11 @@
       .m(4)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, k_gt_1_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_gt_1_subtile) {
   for (size_t k = 2; k < 10; k++) {
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -81194,13 +81194,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, n_gt_4) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_gt_4) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -81211,12 +81211,12 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, n_gt_4_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_gt_4_strided_cn) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -81228,12 +81228,12 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, n_gt_4_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_gt_4_subtile) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -81246,13 +81246,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, n_div_4) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_div_4) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -81263,12 +81263,12 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, n_div_4_strided_cn) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_div_4_strided_cn) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -81280,12 +81280,12 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, n_div_4_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_div_4_subtile) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       for (uint32_t m = 1; m <= 4; m++) {
@@ -81298,13 +81298,13 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, small_kernel) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(4)
@@ -81315,11 +81315,11 @@
       .n(4)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, small_kernel_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, small_kernel_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -81333,13 +81333,13 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, n_gt_4_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_gt_4_small_kernel) {
   for (uint32_t n = 5; n < 8; n++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -81351,12 +81351,12 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, n_div_4_small_kernel) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, n_div_4_small_kernel) {
   for (uint32_t n = 8; n <= 12; n += 4) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -81368,12 +81368,12 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, strided_cm_subtile) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, strided_cm_subtile) {
   for (size_t k = 1; k <= 5; k += 2) {
     for (uint32_t m = 1; m <= 4; m++) {
       for (uint32_t n = 1; n <= 4; n++) {
@@ -81387,13 +81387,13 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
       }
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, a_offset) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, a_offset) {
   for (size_t k = 1; k <= 5; k += 2) {
     GemmMicrokernelTester()
       .mr(4)
@@ -81405,11 +81405,11 @@
       .k(k)
       .ks(3)
       .a_offset(23)
-      .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, zero) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, zero) {
   for (uint32_t mz = 0; mz < 4; mz++) {
     for (size_t k = 1; k <= 5; k += 2) {
       GemmMicrokernelTester()
@@ -81423,12 +81423,12 @@
         .ks(3)
         .a_offset(23)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
     }
   }
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, qmin) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, qmin) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(4)
@@ -81438,10 +81438,10 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, qmax) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, qmax) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(4)
@@ -81451,10 +81451,10 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
 
-TEST(QS8_IGEMM_MINMAX_4X4__SCALAR, strided_cm) {
+TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, strided_cm) {
   GemmMicrokernelTester()
     .mr(4)
     .nr(4)
@@ -81464,5 +81464,5 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_igemm_minmax_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_gemm_scalar_params);
 }
diff --git a/test/qs8-igemm-minmax-gemmlowp.yaml b/test/qs8-igemm-minmax-gemmlowp.yaml
new file mode 100644
index 0000000..42f2158
--- /dev/null
+++ b/test/qs8-igemm-minmax-gemmlowp.yaml
@@ -0,0 +1,533 @@
+# Copyright 2020 Google LLC
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55
+  init: xnn_init_qs8_gemm_neon_params
+  k-block: 16
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128
+  init: xnn_init_qs8_gemm_sse2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2
+  init: xnn_init_qs8_gemm_avx2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2
+  init: xnn_init_qs8_gemm_avx2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2
+  init: xnn_init_qs8_gemm_avx2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx
+  init: xnn_init_qs8_gemm_sse4_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128
+  init: xnn_init_qs8_gemm_wasmsimd_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
+- name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar
+  init: xnn_init_qs8_gemm_scalar_params
+  k-block: 1
diff --git a/test/qs8-igemm-minmax.yaml b/test/qs8-igemm-minmax.yaml
deleted file mode 100644
index eddebf6..0000000
--- a/test/qs8-igemm-minmax.yaml
+++ /dev/null
@@ -1,533 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-- name: xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_6x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_6x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_1x8__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x8__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x8__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x8__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x16__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x16__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x16__neon_mull_addw_dup
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55
-  init: xnn_init_qs8_gemm_neon_params
-  k-block: 16
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c2__sse2_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c2__sse2_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c2__sse2_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x4c2__sse2_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c2__ssse3_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c2__ssse3_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c2__ssse3_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x4c2__ssse3_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c2__sse41_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c2__sse41_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c2__avx_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c2__avx_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c2__avx_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x4c2__avx_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c2__xop_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c2__xop_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c8__sse2_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c8__ssse3_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c8__ssse3_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128
-  init: xnn_init_qs8_gemm_sse2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c8__sse41_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c8__avx_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c8__avx_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c8__avx_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2
-  init: xnn_init_qs8_gemm_avx2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x8c8__avx2
-  init: xnn_init_qs8_gemm_avx2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x8c8__avx2
-  init: xnn_init_qs8_gemm_avx2_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx
-  init: xnn_init_qs8_gemm_sse4_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld64
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x4c8__wasmsimd_ld128
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128
-  init: xnn_init_qs8_gemm_wasmsimd_params
-  k-block: 8
-- name: xnn_qs8_igemm_minmax_ukernel_1x2__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
-- name: xnn_qs8_igemm_minmax_ukernel_2x2__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
-- name: xnn_qs8_igemm_minmax_ukernel_3x2__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
-- name: xnn_qs8_igemm_minmax_ukernel_4x2__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
-- name: xnn_qs8_igemm_minmax_ukernel_1x4__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
-- name: xnn_qs8_igemm_minmax_ukernel_2x4__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
-- name: xnn_qs8_igemm_minmax_ukernel_3x4__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1
-- name: xnn_qs8_igemm_minmax_ukernel_4x4__scalar
-  init: xnn_init_qs8_gemm_scalar_params
-  k-block: 1